lib/idxd: add low level idxd library

Module, etc., will follow. Notes:

* IDXD is an Intel silicon feature available in future Intel CPUs.
Initial development is being done on a simulator. Once HW is
available and the code fully tested the experimental label will be
lifted. Spec can be found here: https://software.intel.com/en-us/download/intel-data-streaming-accelerator-preliminary-architecture-specification

* The current implementation will only work with VFIO.

* DSA has a number of engines that can be grouped based on application
need such as type of memory being served or QoS. Engines are processing
units and are assigned to groups. Work queues are on device structures
that act as front-end groups for queueing descriptors. Full details on
what is configurable & how will come in later doc patches.

* There is a finite number of work queue slots that are divided amongst
the number of desired work queues in some fashion (ie evenly).

* SW (outside of the idxd lib) is required to manage flow control, to not
over-run the work queues.This is provided in the accel plug-in module.
The upper layers use public API to manage this.

* Work queue submissions are done with a 64 byte atomic instruction

* The design here creates a set of descriptor rings per channel that match
the size of the work queues. Then, an spdk_bit_array is used to make sure
we don't overrun a queue.  If there are not slots available, the operation
is put on a linked list to be retried later from the poller.

* As we need to support any number of channels (we can't limit ourselves
to the number of work queues) we need to dynamically size/resize our
per channel descriptor rings based on the number of current channels. This
is done from upper layers via public API into the lib.

* As channels are created, the total number of work queue slots is divided
across the channels evenly. Same thing when they are destroyed, remaining
channels with see the ring sizes increase. This is done from upper layers
via public API into the lib.

* The sim has 64 total work queue entries (WQE) that get dolled out to the
work queues (WQ) evenly.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Change-Id: I899bbeda3cef3db05bea4197b8757e89dddb579d
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1809
Community-CI: Mellanox Build Bot
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Vitaliy Mysak <vitaliy.mysak@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
paul luse 2020-04-10 15:29:01 +00:00 committed by Ben Walker
parent b0b0b8db2b
commit e58e9fbda8
15 changed files with 1848 additions and 2 deletions

3
CONFIG
View File

@ -169,3 +169,6 @@ CONFIG_FUSE=n
# Build with RAID5 support
CONFIG_RAID5=n
# Build with IDXD support
CONFIG_IDXD=n

18
configure vendored
View File

@ -52,6 +52,8 @@ function usage()
echo " Required on some systems to use qat devices. This flag is"
echo " effective only with the default dpdk submodule."
echo " No path required"
echo " idxd Build the IDXD library and accel framework plug-in module."
echo " Disabled while experimental. Only built for x86 when enabled."
echo " crypto Build vbdev crypto module."
echo " No path required."
echo " fio Build fio_plugin."
@ -447,6 +449,12 @@ for i in "$@"; do
--without-raid5)
CONFIG[RAID5]=n
;;
--with-idxd)
CONFIG[IDXD]=y
;;
--without-idxd)
CONFIG[IDXD]=n
;;
--)
break
;;
@ -463,6 +471,16 @@ else
BUILD_CMD=($CC -o /dev/null -x c $CPPFLAGS $CFLAGS $LDFLAGS)
fi
# IDXD uses Intel specific instructions.
if [[ "${CONFIG[IDXD]}" = "y" ]]; then
intel="GenuineIntel"
cpu_vendor=$(grep -i 'vendor' /proc/cpuinfo --max-count=1)
if [[ "$cpu_vendor" != *"$intel"* ]]; then
echo "ERROR: IDXD cannot be used due to CPU incompatiblity."
exit 1
fi
fi
# Detect architecture and force no ISA-L if non-x86 or non-aarch64 architecture
if [[ "${CONFIG[ISAL]}" = "y" ]]; then
if [[ $arch != x86_64* ]] && [[ $arch != aarch64* ]]; then

View File

@ -706,6 +706,13 @@ struct spdk_pci_driver *spdk_pci_vmd_get_driver(void);
*/
struct spdk_pci_driver *spdk_pci_ioat_get_driver(void);
/**
* Get the IDXD PCI driver object.
*
* \return PCI driver.
*/
struct spdk_pci_driver *spdk_pci_idxd_get_driver(void);
/**
* Get the Virtio PCI driver object.
*

207
include/spdk/idxd.h Normal file
View File

@ -0,0 +1,207 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* IDXD DMA engine driver public interface
*/
#ifndef SPDK_IDXD_H
#define SPDK_IDXD_H
#include "spdk/stdinc.h"
#ifdef __cplusplus
extern "C" {
#endif
#include "spdk/env.h"
/**
* Opaque handle for a single IDXD channel.
*/
struct spdk_idxd_io_channel;
/**
* Opaque handle for a single IDXD device.
*/
struct spdk_idxd_device;
/**
* Signature for configuring a channel
*
* \param chan IDXD channel to be configured.
* \return 0 on success, negative errno on failure.
*/
int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan);
/**
* Reconfigures this channel based on how many current channels there are.
*
* \param chan IDXD channel to be set.
* \param num_channels total number of channels in use.
* \return 0 on success, negative errno on failure.
*/
int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels);
/**
* Signature for callback function invoked when a request is completed.
*
* \param arg User-specified opaque value corresponding to cb_arg from the
* request submission.
* \param status 0 on success, negative errno on failure.
*/
typedef void (*spdk_idxd_req_cb)(void *arg, int status);
/**
* Callback for spdk_idxd_probe() enumeration.
*
* \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_idxd_probe().
* \param pci_dev PCI device that is being probed.
*
* \return true to attach to this device.
*/
typedef bool (*spdk_idxd_probe_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev);
/**
* Callback for spdk_idxd_probe() to report a device that has been attached to
* the userspace IDXD driver.
*
* \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_idxd_probe().
* \param pci_dev PCI device that was attached to the driver.
* \param idxd IDXD device that was attached to the driver.
*/
typedef void (*spdk_idxd_attach_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev,
struct spdk_idxd_device *idxd);
/**
* Enumerate the IDXD devices attached to the system and attach the userspace
* IDXD driver to them if desired.
*
* If called more than once, only devices that are not already attached to the
* SPDK IDXD driver will be reported.
*
* To stop using the controller and release its associated resources, call
* spdk_idxd_detach() with the idxd_channel instance returned by this function.
*
* \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of
* the callbacks.
* \param probe_cb will be called once per IDXD device found in the system.
* \param attach_cb will be called for devices for which probe_cb returned true
* once the IDXD controller has been attached to the userspace driver.
*
* \return 0 on success, -1 on failure.
*/
int spdk_idxd_probe(void *cb_ctx, spdk_idxd_probe_cb probe_cb, spdk_idxd_attach_cb attach_cb);
/**
* Detach specified device returned by spdk_idxd_probe() from the IDXD driver.
*
* \param idxd IDXD device to detach from the driver.
*/
void spdk_idxd_detach(struct spdk_idxd_device *idxd);
/**
* Sets the IDXD configuration.
*
* \param config_number the configuration number for a valid IDXD config.
*/
void spdk_idxd_set_config(int config_number);
/**
* Build and submit a DMA engine memory copy request.
*
* This function will build the copy descriptor and then immediately submit
* by writing to the proper device portal.
*
* \param chan IDXD channel to submit request.
* \param dst Destination virtual address.
* \param src Source virtual address.
* \param nbytes Number of bytes to copy.
* \param cb_fn Callback function which will be called when the request is complete.
* \param cb_arg Opaque value which will be passed back as the arg parameter in
* the completion callback.
*
* \return 0 on success, negative errno on failure.
*/
int spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan,
void *dst, const void *src, uint64_t nbytes,
spdk_idxd_req_cb cb_fn, void *cb_arg);
/**
* Build and submit a DMA engine memory fill request.
*
* This function will build the fill descriptor and then immediately submit
* by writing to the proper device portal.
*
* \param chan IDXD channel to submit request.
* \param dst Destination virtual address.
* \param fill_pattern Repeating eight-byte pattern to use for memory fill.
* \param nbytes Number of bytes to fill.
* \param cb_fn Callback function which will be called when the request is complete.
* \param cb_arg Opaque value which will be passed back as the cb_arg parameter
* in the completion callback.
*
* \return 0 on success, negative errno on failure.
*/
int spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan,
void *dst, uint64_t fill_pattern, uint64_t nbytes,
spdk_idxd_req_cb cb_fn, void *cb_arg);
/**
* Check for completed requests on an IDXD channel.
*
* \param chan IDXD channel to check for completions.
*/
void spdk_idxd_process_events(struct spdk_idxd_io_channel *chan);
/**
* Returns an IDXD channel for a given IDXD device.
*
* \param idxd IDXD device to get a channel for.
*
* \return pointer to an IDXD channel.
*/
struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd);
/**
* Free an IDXD channel.
*
* \param chan IDXD channel to free.
*/
void spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -62,6 +62,8 @@ extern "C" {
*/
#define SPDK_PCI_CLASS_NVME 0x010802
#define PCI_DEVICE_ID_INTEL_IDXD 0x0b25
#define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20
#define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21
#define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22

View File

@ -0,0 +1,74 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __IDXD_INTERNAL_H__
#define __IDXD_INTERNAL_H__
#include "spdk/stdinc.h"
#include "spdk/idxd.h"
#include "spdk/queue.h"
#include "spdk/mmio.h"
#include "spdk/bit_array.h"
#ifdef __cplusplus
extern "C" {
#endif
#define IDXD_MAX_CONFIG_NUM 1
enum dsa_opcode {
IDXD_OPCODE_NOOP = 0,
IDXD_OPCODE_BATCH = 1,
IDXD_OPCODE_DRAIN = 2,
IDXD_OPCODE_MEMMOVE = 3,
IDXD_OPCODE_MEMFILL = 4,
IDXD_OPCODE_COMPARE = 5,
IDXD_OPCODE_COMPVAL = 6,
IDXD_OPCODE_CR_DELTA = 7,
IDXD_OPCODE_AP_DELTA = 8,
IDXD_OPCODE_DUALCAST = 9,
IDXD_OPCODE_CRCGEN = 16,
IDXD_OPCODE_COPY_CRC = 17,
IDXD_OPCODE_DIF_CHECK = 18,
IDXD_OPCODE_DIF_INS = 19,
IDXD_OPCODE_DIF_STRP = 20,
IDXD_OPCODE_DIF_UPDT = 21,
IDXD_OPCODE_CFLUSH = 32,
};
#ifdef __cplusplus
}
#endif
#endif /* __IDXD_INTERNAL_H__ */

View File

@ -43,7 +43,7 @@ DIRS-y += nbd ftl
endif
DIRS-$(CONFIG_OCF) += env_ocf
DIRS-$(CONFIG_IDXD) += idxd
DIRS-$(CONFIG_VHOST) += vhost
DIRS-$(CONFIG_VIRTIO) += virtio
DIRS-$(CONFIG_REDUCE) += reduce

View File

@ -40,7 +40,7 @@ SO_SUFFIX := $(SO_VER).$(SO_MINOR)
CFLAGS += $(ENV_CFLAGS)
C_SRCS = env.c memory.c pci.c init.c threads.c
C_SRCS += pci_nvme.c pci_ioat.c pci_virtio.c pci_vmd.c
C_SRCS += pci_nvme.c pci_ioat.c pci_virtio.c pci_vmd.c pci_idxd.c
LIBNAME = env_dpdk
SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_env_dpdk.map)

64
lib/env_dpdk/pci_idxd.c Normal file
View File

@ -0,0 +1,64 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "env_internal.h"
#include "spdk/pci_ids.h"
#define SPDK_IDXD_PCI_DEVICE(DEVICE_ID) RTE_PCI_DEVICE(SPDK_PCI_VID_INTEL, DEVICE_ID)
static struct rte_pci_id idxd_driver_id[] = {
{SPDK_IDXD_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IDXD)},
{ .vendor_id = 0, /* sentinel */ },
};
static struct spdk_pci_driver g_idxd_pci_drv = {
.driver = {
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
.id_table = idxd_driver_id,
.probe = pci_device_init,
.remove = pci_device_fini,
.driver.name = "spdk_idxd",
},
.cb_fn = NULL,
.cb_arg = NULL,
.is_registered = false,
};
struct spdk_pci_driver *
spdk_pci_idxd_get_driver(void)
{
return &g_idxd_pci_drv;
}
SPDK_PMD_REGISTER_PCI(g_idxd_pci_drv);

40
lib/idxd/Makefile Normal file
View File

@ -0,0 +1,40 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
C_SRCS = idxd.c
LIBNAME = idxd
include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk

756
lib/idxd/idxd.c Normal file
View File

@ -0,0 +1,756 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spdk/stdinc.h"
#include "spdk/env.h"
#include "spdk/util.h"
#include "spdk/memory.h"
#include "spdk_internal/log.h"
#include "spdk_internal/idxd.h"
#include "idxd.h"
pthread_mutex_t g_driver_lock = PTHREAD_MUTEX_INITIALIZER;
/*
* g_dev_cfg gives us 2 pre-set configurations of DSA to choose from
* via RPC.
*/
struct device_config *g_dev_cfg = NULL;
/*
* Pre-built configurations. Variations depend on various factors
* including how many different types of target latency profiles there
* are, how many different QOS requirements there might be, etc.
*/
struct device_config g_dev_cfg0 = {
.config_num = 0,
.num_groups = 4,
.num_wqs_per_group = 1,
.num_engines_per_group = 1,
.total_wqs = 4,
.total_engines = 4,
};
struct device_config g_dev_cfg1 = {
.config_num = 1,
.num_groups = 2,
.num_wqs_per_group = 2,
.num_engines_per_group = 2,
.total_wqs = 4,
.total_engines = 4,
};
static uint32_t
_idxd_read_4(struct spdk_idxd_device *idxd, uint32_t offset)
{
return spdk_mmio_read_4((uint32_t *)(idxd->reg_base + offset));
}
static void
_idxd_write_4(struct spdk_idxd_device *idxd, uint32_t offset, uint32_t value)
{
spdk_mmio_write_4((uint32_t *)(idxd->reg_base + offset), value);
}
static uint64_t
_idxd_read_8(struct spdk_idxd_device *idxd, uint32_t offset)
{
return spdk_mmio_read_8((uint64_t *)(idxd->reg_base + offset));
}
static void
_idxd_write_8(struct spdk_idxd_device *idxd, uint32_t offset, uint64_t value)
{
spdk_mmio_write_8((uint64_t *)(idxd->reg_base + offset), value);
}
struct spdk_idxd_io_channel *
spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
{
struct spdk_idxd_io_channel *chan;
chan = calloc(1, sizeof(struct spdk_idxd_io_channel));
if (chan == NULL) {
SPDK_ERRLOG("Failed to allocate idxd chan\n");
return NULL;
}
chan->idxd = idxd;
return chan;
}
void
spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan)
{
free(chan);
}
int
spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan)
{
uint32_t num_ring_slots;
chan->idxd->wq_id++;
if (chan->idxd->wq_id == g_dev_cfg->total_wqs) {
chan->idxd->wq_id = 0;
}
num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size;
chan->ring_ctrl.ring_slots = spdk_bit_array_create(num_ring_slots);
if (chan->ring_ctrl.ring_slots == NULL) {
SPDK_ERRLOG("Failed to allocate bit array for ring\n");
return -ENOMEM;
}
/*
* max ring slots can change as channels come and go but we
* start off getting all of the slots for this work queue.
*/
chan->ring_ctrl.max_ring_slots = num_ring_slots;
/* Store the original size of the ring. */
chan->ring_ctrl.ring_size = num_ring_slots;
chan->ring_ctrl.data_desc = spdk_zmalloc(num_ring_slots * sizeof(struct idxd_hw_desc),
0x40, NULL,
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
if (chan->ring_ctrl.data_desc == NULL) {
SPDK_ERRLOG("Failed to allocate descriptor memory\n");
spdk_bit_array_free(&chan->ring_ctrl.ring_slots);
return -ENOMEM;
}
chan->ring_ctrl.completions = spdk_zmalloc(num_ring_slots * sizeof(struct idxd_comp),
0x40, NULL,
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
if (chan->ring_ctrl.completions == NULL) {
SPDK_ERRLOG("Failed to allocate completion memory\n");
spdk_bit_array_free(&chan->ring_ctrl.ring_slots);
spdk_free(chan->ring_ctrl.data_desc);
return -ENOMEM;
}
chan->ring_ctrl.portal = (char *)chan->idxd->portals + chan->idxd->wq_id * PORTAL_SIZE;
return 0;
}
static void
_idxd_drain(struct spdk_idxd_io_channel *chan)
{
uint32_t index;
int set = 0;
/*
* TODO this is a temp solution to drain until getting the drain cmd to work, this
* provides equivalent functionality but just doesn't use the device to do it.
*/
do {
spdk_idxd_process_events(chan);
set = 0;
for (index = 0; index < chan->ring_ctrl.max_ring_slots; index++) {
set |= spdk_bit_array_get(chan->ring_ctrl.ring_slots, index);
}
} while (set);
}
int
spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels)
{
uint32_t num_ring_slots;
int rc;
_idxd_drain(chan);
assert(spdk_bit_array_count_set(chan->ring_ctrl.ring_slots) == 0);
if (num_channels == 0) {
spdk_free(chan->ring_ctrl.completions);
spdk_free(chan->ring_ctrl.data_desc);
spdk_bit_array_free(&chan->ring_ctrl.ring_slots);
return 0;
}
num_ring_slots = chan->ring_ctrl.ring_size / num_channels;
/* re-allocate our descriptor ring for hw flow control. */
rc = spdk_bit_array_resize(&chan->ring_ctrl.ring_slots, num_ring_slots);
if (rc < 0) {
SPDK_ERRLOG("Unable to resize channel bit array\n");
return -ENOMEM;
}
chan->ring_ctrl.max_ring_slots = num_ring_slots;
return rc;
}
/* Called via RPC to select a pre-defined configuration. */
void
spdk_idxd_set_config(int config_num)
{
switch (config_num) {
case 0:
g_dev_cfg = &g_dev_cfg0;
break;
case 1:
g_dev_cfg = &g_dev_cfg1;
break;
default:
g_dev_cfg = &g_dev_cfg0;
SPDK_ERRLOG("Invalid config, using default\n");
break;
}
}
static int
idxd_unmap_pci_bar(struct spdk_idxd_device *idxd, int bar)
{
int rc = 0;
void *addr = NULL;
if (bar == IDXD_MMIO_BAR) {
addr = (void *)idxd->reg_base;
} else if (bar == IDXD_WQ_BAR) {
addr = (void *)idxd->portals;
}
if (addr) {
rc = spdk_pci_device_unmap_bar(idxd->device, 0, addr);
}
return rc;
}
static int
idxd_map_pci_bars(struct spdk_idxd_device *idxd)
{
int rc;
void *addr;
uint64_t phys_addr, size;
rc = spdk_pci_device_map_bar(idxd->device, IDXD_MMIO_BAR, &addr, &phys_addr, &size);
if (rc != 0 || addr == NULL) {
SPDK_ERRLOG("pci_device_map_range failed with error code %d\n", rc);
return -1;
}
idxd->reg_base = addr;
rc = spdk_pci_device_map_bar(idxd->device, IDXD_WQ_BAR, &addr, &phys_addr, &size);
if (rc != 0 || addr == NULL) {
SPDK_ERRLOG("pci_device_map_range failed with error code %d\n", rc);
rc = idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR);
if (rc) {
SPDK_ERRLOG("unable to unmap MMIO bar\n");
}
return -EINVAL;
}
idxd->portals = addr;
return 0;
}
/* Used for control commands, not for descriptor submission. */
static int
idxd_wait_cmd(struct spdk_idxd_device *idxd, int _timeout)
{
uint32_t timeout = _timeout;
union idxd_cmdsts_reg cmd_status = {};
cmd_status.raw = _idxd_read_4(idxd, IDXD_CMDSTS_OFFSET);
while (cmd_status.active && --timeout) {
usleep(1);
cmd_status.raw = _idxd_read_4(idxd, IDXD_CMDSTS_OFFSET);
}
/* Check for timeout */
if (timeout == 0 && cmd_status.active) {
SPDK_ERRLOG("Command timeout, waited %u\n", _timeout);
return -EBUSY;
}
/* Check for error */
if (cmd_status.err) {
SPDK_ERRLOG("Command status reg reports error 0x%x\n", cmd_status.err);
return -EINVAL;
}
return 0;
}
static int
idxd_reset_dev(struct spdk_idxd_device *idxd)
{
int rc;
_idxd_write_4(idxd, IDXD_CMD_OFFSET, IDXD_RESET_DEVICE << IDXD_CMD_SHIFT);
rc = idxd_wait_cmd(idxd, IDXD_REGISTER_TIMEOUT_US);
if (rc < 0) {
SPDK_ERRLOG("Error resetting device %u\n", rc);
}
return rc;
}
/*
* Build group config based on getting info from the device combined
* with the defined configuration. Once built, it is written to the
* device.
*/
static int
idxd_group_config(struct spdk_idxd_device *idxd)
{
int i;
uint64_t base_offset;
assert(g_dev_cfg->num_groups <= idxd->registers.groupcap.num_groups);
idxd->groups = calloc(idxd->registers.groupcap.num_groups, sizeof(struct idxd_group));
if (idxd->groups == NULL) {
SPDK_ERRLOG("Failed to allocate group memory\n");
return -ENOMEM;
}
assert(g_dev_cfg->total_engines <= idxd->registers.enginecap.num_engines);
for (i = 0; i < g_dev_cfg->total_engines; i++) {
idxd->groups[i % g_dev_cfg->num_groups].grpcfg.engines |= (1 << i);
}
assert(g_dev_cfg->total_wqs <= idxd->registers.wqcap.num_wqs);
for (i = 0; i < g_dev_cfg->total_wqs; i++) {
idxd->groups[i % g_dev_cfg->num_groups].grpcfg.wqs[0] |= (1 << i);
}
for (i = 0; i < g_dev_cfg->num_groups; i++) {
idxd->groups[i].idxd = idxd;
idxd->groups[i].id = i;
/* Divide BW tokens evenly */
idxd->groups[i].grpcfg.flags.tokens_allowed =
idxd->registers.groupcap.total_tokens / g_dev_cfg->num_groups;
}
/*
* Now write the group config to the device for all groups. We write
* to the max number of groups in order to 0 out the ones we didn't
* configure.
*/
for (i = 0 ; i < idxd->registers.groupcap.num_groups; i++) {
base_offset = idxd->grpcfg_offset + i * 64;
/* GRPWQCFG, work queues config */
_idxd_write_8(idxd, base_offset, idxd->groups[i].grpcfg.wqs[0]);
/* GRPENGCFG, engine config */
_idxd_write_8(idxd, base_offset + CFG_ENGINE_OFFSET, idxd->groups[i].grpcfg.engines);
/* GRPFLAGS, flags config */
_idxd_write_8(idxd, base_offset + CFG_FLAG_OFFSET, idxd->groups[i].grpcfg.flags.raw);
}
return 0;
}
/*
* Build work queue (WQ) config based on getting info from the device combined
* with the defined configuration. Once built, it is written to the device.
*/
static int
idxd_wq_config(struct spdk_idxd_device *idxd)
{
int i, j;
struct idxd_wq *queue;
u_int32_t wq_size = idxd->registers.wqcap.total_wq_size / g_dev_cfg->total_wqs;
SPDK_NOTICELOG("Total ring slots available space 0x%x, so per work queue is 0x%x\n",
idxd->registers.wqcap.total_wq_size, wq_size);
assert(g_dev_cfg->total_wqs <= IDXD_MAX_QUEUES);
assert(g_dev_cfg->total_wqs <= idxd->registers.wqcap.num_wqs);
assert(LOG2_WQ_MAX_BATCH <= idxd->registers.gencap.max_batch_shift);
assert(LOG2_WQ_MAX_XFER <= idxd->registers.gencap.max_xfer_shift);
idxd->queues = calloc(1, idxd->registers.wqcap.num_wqs * sizeof(struct idxd_wq));
if (idxd->queues == NULL) {
SPDK_ERRLOG("Failed to allocate queue memory\n");
return -ENOMEM;
}
for (i = 0; i < g_dev_cfg->total_wqs; i++) {
queue = &idxd->queues[i];
queue->wqcfg.wq_size = wq_size;
queue->wqcfg.mode = WQ_MODE_DEDICATED;
queue->wqcfg.max_batch_shift = LOG2_WQ_MAX_BATCH;
queue->wqcfg.max_xfer_shift = LOG2_WQ_MAX_XFER;
queue->wqcfg.wq_state = WQ_ENABLED;
queue->wqcfg.priority = WQ_PRIORITY_1;
/* Not part of the config struct */
queue->idxd = idxd;
queue->group = &idxd->groups[i % g_dev_cfg->num_groups];
}
/*
* Now write the work queue config to the device for all wq space
*/
for (i = 0 ; i < idxd->registers.wqcap.num_wqs; i++) {
queue = &idxd->queues[i];
for (j = 0 ; j < WQCFG_NUM_DWORDS; j++) {
_idxd_write_4(idxd, idxd->wqcfg_offset + i * 32 + j * 4,
queue->wqcfg.raw[j]);
}
}
return 0;
}
static int
idxd_device_configure(struct spdk_idxd_device *idxd)
{
int i, rc = 0;
union idxd_offsets_register offsets_reg;
union idxd_genstatus_register genstatus_reg;
/*
* Map BAR0 and BAR2
*/
rc = idxd_map_pci_bars(idxd);
if (rc) {
return rc;
}
/*
* Reset the device
*/
rc = idxd_reset_dev(idxd);
if (rc) {
goto err_reset;
}
/*
* Read in config registers
*/
idxd->registers.version = _idxd_read_4(idxd, IDXD_VERSION_OFFSET);
idxd->registers.gencap.raw = _idxd_read_8(idxd, IDXD_GENCAP_OFFSET);
idxd->registers.wqcap.raw = _idxd_read_8(idxd, IDXD_WQCAP_OFFSET);
idxd->registers.groupcap.raw = _idxd_read_8(idxd, IDXD_GRPCAP_OFFSET);
idxd->registers.enginecap.raw = _idxd_read_8(idxd, IDXD_ENGCAP_OFFSET);
for (i = 0; i < IDXD_OPCAP_WORDS; i++) {
idxd->registers.opcap.raw[i] =
_idxd_read_8(idxd, i * sizeof(uint64_t) + IDXD_OPCAP_OFFSET);
}
offsets_reg.raw[0] = _idxd_read_8(idxd, IDXD_TABLE_OFFSET);
offsets_reg.raw[1] = _idxd_read_8(idxd, IDXD_TABLE_OFFSET + sizeof(uint64_t));
idxd->grpcfg_offset = offsets_reg.grpcfg * IDXD_TABLE_OFFSET_MULT;
idxd->wqcfg_offset = offsets_reg.wqcfg * IDXD_TABLE_OFFSET_MULT;
idxd->ims_offset = offsets_reg.ims * IDXD_TABLE_OFFSET_MULT;
idxd->msix_perm_offset = offsets_reg.msix_perm * IDXD_TABLE_OFFSET_MULT;
idxd->perfmon_offset = offsets_reg.perfmon * IDXD_TABLE_OFFSET_MULT;
/*
* Configure groups and work queues.
*/
rc = idxd_group_config(idxd);
if (rc) {
goto err_group_cfg;
}
rc = idxd_wq_config(idxd);
if (rc) {
goto err_wq_cfg;
}
/*
* Enable the device
*/
genstatus_reg.raw = _idxd_read_4(idxd, IDXD_GENSTATUS_OFFSET);
assert(genstatus_reg.state == IDXD_DEVICE_STATE_DISABLED);
_idxd_write_4(idxd, IDXD_CMD_OFFSET, IDXD_ENABLE_DEV << IDXD_CMD_SHIFT);
rc = idxd_wait_cmd(idxd, IDXD_REGISTER_TIMEOUT_US);
genstatus_reg.raw = _idxd_read_4(idxd, IDXD_GENSTATUS_OFFSET);
if ((rc < 0) || (genstatus_reg.state != IDXD_DEVICE_STATE_ENABLED)) {
rc = -EINVAL;
SPDK_ERRLOG("Error enabling device %u\n", rc);
goto err_device_enable;
}
genstatus_reg.raw = spdk_mmio_read_4((uint32_t *)(idxd->reg_base + IDXD_GENSTATUS_OFFSET));
assert(genstatus_reg.state == IDXD_DEVICE_STATE_ENABLED);
/*
* Enable the work queues that we've configured
*/
for (i = 0; i < g_dev_cfg->total_wqs; i++) {
_idxd_write_4(idxd, IDXD_CMD_OFFSET,
(IDXD_ENABLE_WQ << IDXD_CMD_SHIFT) | i);
rc = idxd_wait_cmd(idxd, IDXD_REGISTER_TIMEOUT_US);
if (rc < 0) {
SPDK_ERRLOG("Error enabling work queues 0x%x\n", rc);
goto err_wq_enable;
}
}
if ((rc == 0) && (genstatus_reg.state == IDXD_DEVICE_STATE_ENABLED)) {
SPDK_NOTICELOG("Device enabled, version 0x%x gencap: 0x%lx\n",
idxd->registers.version,
idxd->registers.gencap.raw);
}
return rc;
err_wq_enable:
err_device_enable:
free(idxd->queues);
err_wq_cfg:
free(idxd->groups);
err_group_cfg:
err_reset:
idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR);
idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR);
return rc;
}
static void
idxd_device_destruct(struct spdk_idxd_device *idxd)
{
idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR);
idxd_unmap_pci_bar(idxd, IDXD_WQ_BAR);
free(idxd->groups);
free(idxd->queues);
free(idxd);
}
/* Caller must hold g_driver_lock */
static struct spdk_idxd_device *
idxd_attach(struct spdk_pci_device *device)
{
struct spdk_idxd_device *idxd;
uint32_t cmd_reg;
int rc;
idxd = calloc(1, sizeof(struct spdk_idxd_device));
if (idxd == NULL) {
SPDK_ERRLOG("Failed to allocate memory for idxd device.\n");
return NULL;
}
idxd->device = device;
/* Enable PCI busmaster. */
spdk_pci_device_cfg_read32(device, &cmd_reg, 4);
cmd_reg |= 0x4;
spdk_pci_device_cfg_write32(device, cmd_reg, 4);
rc = idxd_device_configure(idxd);
if (rc) {
goto err;
}
return idxd;
err:
idxd_device_destruct(idxd);
return NULL;
}
struct idxd_enum_ctx {
spdk_idxd_probe_cb probe_cb;
spdk_idxd_attach_cb attach_cb;
void *cb_ctx;
};
/* This function must only be called while holding g_driver_lock */
static int
idxd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
{
struct idxd_enum_ctx *enum_ctx = ctx;
struct spdk_idxd_device *idxd;
if (enum_ctx->probe_cb(enum_ctx->cb_ctx, pci_dev)) {
idxd = idxd_attach(pci_dev);
if (idxd == NULL) {
SPDK_ERRLOG("idxd_attach() failed\n");
return -EINVAL;
}
enum_ctx->attach_cb(enum_ctx->cb_ctx, pci_dev, idxd);
}
return 0;
}
int
spdk_idxd_probe(void *cb_ctx, spdk_idxd_probe_cb probe_cb, spdk_idxd_attach_cb attach_cb)
{
int rc;
struct idxd_enum_ctx enum_ctx;
enum_ctx.probe_cb = probe_cb;
enum_ctx.attach_cb = attach_cb;
enum_ctx.cb_ctx = cb_ctx;
pthread_mutex_lock(&g_driver_lock);
rc = spdk_pci_enumerate(spdk_pci_idxd_get_driver(), idxd_enum_cb, &enum_ctx);
pthread_mutex_unlock(&g_driver_lock);
return rc;
}
void
spdk_idxd_detach(struct spdk_idxd_device *idxd)
{
idxd_device_destruct(idxd);
}
int
spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan, void *dst, const void *src,
uint64_t nbytes,
spdk_idxd_req_cb cb_fn, void *cb_arg)
{
uint32_t index;
struct idxd_hw_desc *desc;
struct idxd_comp *comp;
index = spdk_bit_array_find_first_clear(chan->ring_ctrl.ring_slots, 0);
if (index == UINT32_MAX) {
/* ran out of ring slots */
return -EBUSY;
}
spdk_bit_array_set(chan->ring_ctrl.ring_slots, index);
desc = &chan->ring_ctrl.data_desc[index];
comp = &chan->ring_ctrl.completions[index];
desc->opcode = IDXD_OPCODE_MEMMOVE;
desc->flags = IDXD_FLAG_COMPLETION_ADDR_VALID | IDXD_FLAG_REQUEST_COMPLETION;
desc->completion_addr = (uintptr_t)&comp->hw;
desc->src_addr = (uintptr_t)src;
desc->dst_addr = (uintptr_t)dst;
desc->xfer_size = nbytes;
comp->cb_arg = (uint64_t)cb_arg;
comp->cb_fn = cb_fn;
movdir64b((uint64_t *)chan->ring_ctrl.portal, desc);
return 0;
}
int
spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan, void *dst, uint64_t fill_pattern,
uint64_t nbytes,
spdk_idxd_req_cb cb_fn, void *cb_arg)
{
uint32_t index;
struct idxd_hw_desc *desc;
struct idxd_comp *comp;
index = spdk_bit_array_find_first_clear(chan->ring_ctrl.ring_slots, 0);
if (index == UINT32_MAX) {
/* ran out of ring slots */
return -EBUSY;
}
spdk_bit_array_set(chan->ring_ctrl.ring_slots, index);
desc = &chan->ring_ctrl.data_desc[index];
comp = &chan->ring_ctrl.completions[index];
desc->opcode = IDXD_OPCODE_MEMFILL;
desc->flags = IDXD_FLAG_COMPLETION_ADDR_VALID | IDXD_FLAG_REQUEST_COMPLETION;
desc->completion_addr = (uintptr_t)&comp->hw;
desc->pattern = fill_pattern;
desc->dst_addr = (uintptr_t)dst;
desc->xfer_size = nbytes;
comp->cb_arg = (uint64_t)cb_arg;
comp->cb_fn = cb_fn;
movdir64b((uint64_t *)chan->ring_ctrl.portal, desc);
return 0;
}
static void
_dump_error_reg(struct spdk_idxd_io_channel *chan)
{
uint64_t sw_error_0;
uint16_t i;
sw_error_0 = _idxd_read_8(chan->idxd, IDXD_SWERR_OFFSET);
SPDK_NOTICELOG("SW Error bits set:");
for (i = 0; i < CHAR_BIT; i++) {
if ((1ULL << i) & sw_error_0) {
SPDK_NOTICELOG(" %d\n", i);
}
}
SPDK_NOTICELOG("SW Error error code: %#x\n", (uint8_t)(sw_error_0 >> 8));
SPDK_NOTICELOG("SW Error WQ index: %u\n", (uint8_t)(sw_error_0 >> 16));
SPDK_NOTICELOG("SW Error Operation: %u\n", (uint8_t)(sw_error_0 >> 32));
}
/*
* TODO: Experiment with different methods of reaping completions for performance
* once we have real silicon.
*/
void
spdk_idxd_process_events(struct spdk_idxd_io_channel *chan)
{
uint16_t index;
struct idxd_comp *comp;
uint64_t sw_error_0;
int status = 0;
for (index = 0; index < chan->ring_ctrl.max_ring_slots; index++) {
if (spdk_bit_array_get(chan->ring_ctrl.ring_slots, index)) {
comp = &chan->ring_ctrl.completions[index];
if (comp->hw.status == 1) {
sw_error_0 = _idxd_read_8(chan->idxd, IDXD_SWERR_OFFSET);
if (sw_error_0 & 0x1) {
_dump_error_reg(chan);
status = -EINVAL;
}
comp->cb_fn((void *)comp->cb_arg, status);
comp->hw.status = status = 0;
spdk_bit_array_clear(chan->ring_ctrl.ring_slots, index);
}
}
}
}
SPDK_LOG_REGISTER_COMPONENT("idxd", SPDK_LOG_IDXD)

165
lib/idxd/idxd.h Normal file
View File

@ -0,0 +1,165 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __IDXD_H__
#define __IDXD_H__
#include "spdk/stdinc.h"
#include "spdk/idxd.h"
#include "spdk/queue.h"
#include "spdk/mmio.h"
#include "spdk/bit_array.h"
#include "idxd_spec.h"
#ifdef __cplusplus
extern "C" {
#endif
/* TODO: get the gcc intrinsic to work. */
#define nop() asm volatile ("nop")
static inline void movdir64b(void *dst, const void *src)
{
asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
: "=m"(*(char *)dst)
: "d"(src), "a"(dst));
}
#define IDXD_REGISTER_TIMEOUT_US 50
/* TODO: make some of these RPC selectable */
#define WQ_MODE_DEDICATED 1
#define LOG2_WQ_MAX_BATCH 8 /* 2^8 = 256 */
#define LOG2_WQ_MAX_XFER 30 /* 2^30 = 1073741824 */
#define WQCFG_NUM_DWORDS 8
#define WQ_PRIORITY_1 1
#define IDXD_MAX_QUEUES 64
struct device_config {
uint8_t config_num;
uint8_t num_wqs_per_group;
uint8_t num_engines_per_group;
uint8_t num_groups;
uint16_t total_wqs;
uint16_t total_engines;
};
struct idxd_ring_control {
void *portal;
uint16_t ring_size;
/*
* Rings for this channel, one for descriptors and one
* for completions, share the same index. Future will
* include a separate ring for batch descriptors once
* the batch interface is completed.
*/
struct idxd_hw_desc *data_desc;
struct idxd_comp *completions;
/*
* We use one bit array to track ring slots for both
* data_desc and completions.
*/
struct spdk_bit_array *ring_slots;
uint32_t max_ring_slots;
};
struct spdk_idxd_io_channel {
struct spdk_idxd_device *idxd;
struct idxd_ring_control ring_ctrl;
};
struct pci_dev_id {
int vendor_id;
int device_id;
};
struct idxd_group {
struct spdk_idxd_device *idxd;
struct idxd_grpcfg grpcfg;
struct pci_dev_id pcidev;
int num_engines;
int num_wqs;
int id;
uint8_t tokens_allowed;
bool use_token_limit;
uint8_t tokens_reserved;
int tc_a;
int tc_b;
};
/*
* This struct wraps the hardware completion record which is 32 bytes in
* size and must be 32 byte aligned.
*/
struct idxd_comp {
struct idxd_hw_comp_record hw;
uint64_t cb_arg;
spdk_idxd_req_cb cb_fn;
uint64_t pad1;
uint64_t pad2;
} __attribute__((packed));
SPDK_STATIC_ASSERT(sizeof(struct idxd_comp) == 64, "size mismatch");
struct idxd_wq {
struct spdk_idxd_device *idxd;
struct idxd_group *group;
union idxd_wqcfg wqcfg;
};
struct spdk_idxd_device {
struct spdk_pci_device *device;
void *reg_base;
void *portals;
int socket_id;
int wq_id;
struct idxd_registers registers;
uint32_t ims_offset;
uint32_t msix_perm_offset;
uint32_t wqcfg_offset;
uint32_t grpcfg_offset;
uint32_t perfmon_offset;
struct idxd_group *groups;
struct idxd_wq *queues;
};
#ifdef __cplusplus
}
#endif
#endif /* __IDXD_H__ */

501
lib/idxd/idxd_spec.h Normal file
View File

@ -0,0 +1,501 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* IDXD specification definitions
*/
#ifndef SPDK_IDXD_SPEC_H
#define SPDK_IDXD_SPEC_H
#include "spdk/stdinc.h"
#include "spdk/assert.h"
#ifdef __cplusplus
extern "C" {
#endif
#define IDXD_MMIO_BAR 0
#define IDXD_WQ_BAR 2
#define PORTAL_SIZE (4096 * 4)
#define CFG_ENGINE_OFFSET 0x20
#define CFG_FLAG_OFFSET 0x28
#define IDXD_CMD_SHIFT 20
#define IDXD_VERSION_OFFSET 0x00
#define IDXD_GENCAP_OFFSET 0x10
#define IDXD_WQCAP_OFFSET 0x20
#define IDXD_GRPCAP_OFFSET 0x30
#define IDXD_OPCAP_OFFSET 0x40
#define IDXD_ENGCAP_OFFSET 0x38
#define IDXD_OPCAP_OFFSET 0x40
#define IDXD_TABLE_OFFSET 0x60
#define IDXD_GENCFG_OFFSET 0x80
#define IDXD_GENCTRL_OFFSET 0x88
#define IDXD_GENSTATUS_OFFSET 0x90
#define IDXD_INTCAUSE_OFFSET 0x98
#define IDXD_CMD_OFFSET 0xa0
#define IDXD_CMDSTS_OFFSET 0xa8
#define IDXD_SWERR_OFFSET 0xc0
#define IDXD_TABLE_OFFSET_MULT 0x100
#define IDXD_OPCAP_WORDS 0x4
#define IDXD_FLAG_FENCE (1 << 0)
#define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2)
#define IDXD_FLAG_REQUEST_COMPLETION (1 << 3)
#define IDXD_FLAG_CACHE_CONTROL (1 << 8)
/*
* IDXD is a family of devices, DSA is the only currently
* supported one.
*/
enum dsa_completion_status {
IDXD_COMP_NONE = 0,
IDXD_COMP_SUCCESS = 1,
IDXD_COMP_SUCCESS_PRED = 2,
IDXD_COMP_PAGE_FAULT_NOBOF = 3,
IDXD_COMP_PAGE_FAULT_IR = 4,
IDXD_COMP_BATCH_FAIL = 5,
IDXD_COMP_BATCH_PAGE_FAULT = 6,
IDXD_COMP_DR_OFFSET_NOINC = 7,
IDXD_COMP_DR_OFFSET_ERANGE = 8,
IDXD_COMP_DIF_ERR = 9,
IDXD_COMP_BAD_OPCODE = 16,
IDXD_COMP_INVALID_FLAGS = 17,
IDXD_COMP_NOZERO_RESERVE = 18,
IDXD_COMP_XFER_ERANGE = 19,
IDXD_COMP_DESC_CNT_ERANGE = 20,
IDXD_COMP_DR_ERANGE = 21,
IDXD_COMP_OVERLAP_BUFFERS = 22,
IDXD_COMP_DCAST_ERR = 23,
IDXD_COMP_DESCLIST_ALIGN = 24,
IDXD_COMP_INT_HANDLE_INVAL = 25,
IDXD_COMP_CRA_XLAT = 26,
IDXD_COMP_CRA_ALIGN = 27,
IDXD_COMP_ADDR_ALIGN = 28,
IDXD_COMP_PRIV_BAD = 29,
IDXD_COMP_TRAFFIC_CLASS_CONF = 30,
IDXD_COMP_PFAULT_RDBA = 31,
IDXD_COMP_HW_ERR1 = 32,
IDXD_COMP_HW_ERR_DRB = 33,
IDXD_COMP_TRANSLATION_FAIL = 34,
};
enum idxd_wq_state {
WQ_DISABLED = 0,
WQ_ENABLED = 1,
};
enum idxd_wq_flag {
WQ_FLAG_DEDICATED = 0,
WQ_FLAG_BOF = 1,
};
enum idxd_wq_type {
WQT_NONE = 0,
WQT_KERNEL = 1,
WQT_USER = 2,
WQT_MDEV = 3,
};
enum idxd_dev_state {
IDXD_DEVICE_STATE_DISABLED = 0,
IDXD_DEVICE_STATE_ENABLED = 1,
IDXD_DEVICE_STATE_DRAIN = 2,
IDXD_DEVICE_STATE_HALT = 3,
};
enum idxd_device_reset_type {
IDXD_DEVICE_RESET_SOFTWARE = 0,
IDXD_DEVICE_RESET_FLR = 1,
IDXD_DEVICE_RESET_WARM = 2,
IDXD_DEVICE_RESET_COLD = 3,
};
enum idxd_cmds {
IDXD_ENABLE_DEV = 1,
IDXD_DISABLE_DEV = 2,
IDXD_DRAIN_ALL = 3,
IDXD_ABORT_ALL = 4,
IDXD_RESET_DEVICE = 5,
IDXD_ENABLE_WQ = 6,
IDXD_DISABLE_WQ = 7,
IDXD_DRAIN_WQ = 8,
IDXD_ABORT_WQ = 9,
IDXD_RESET_WQ = 10,
};
enum idxd_cmdsts_err {
IDXD_CMDSTS_SUCCESS = 0,
IDXD_CMDSTS_INVAL_CMD = 1,
IDXD_CMDSTS_INVAL_WQIDX = 2,
IDXD_CMDSTS_HW_ERR = 3,
IDXD_CMDSTS_ERR_DEV_ENABLED = 16,
IDXD_CMDSTS_ERR_CONFIG = 17,
IDXD_CMDSTS_ERR_BUSMASTER_EN = 18,
IDXD_CMDSTS_ERR_PASID_INVAL = 19,
IDXD_CMDSTS_ERR_WQ_SIZE_ERANGE = 20,
IDXD_CMDSTS_ERR_GRP_CONFIG = 21,
IDXD_CMDSTS_ERR_GRP_CONFIG2 = 22,
IDXD_CMDSTS_ERR_GRP_CONFIG3 = 23,
IDXD_CMDSTS_ERR_GRP_CONFIG4 = 24,
IDXD_CMDSTS_ERR_DEV_NOTEN = 32,
IDXD_CMDSTS_ERR_WQ_ENABLED = 33,
IDXD_CMDSTS_ERR_WQ_SIZE = 34,
IDXD_CMDSTS_ERR_WQ_PRIOR = 35,
IDXD_CMDSTS_ERR_WQ_MODE = 36,
IDXD_CMDSTS_ERR_BOF_EN = 37,
IDXD_CMDSTS_ERR_PASID_EN = 38,
IDXD_CMDSTS_ERR_MAX_BATCH_SIZE = 39,
IDXD_CMDSTS_ERR_MAX_XFER_SIZE = 40,
IDXD_CMDSTS_ERR_DIS_DEV_EN = 49,
IDXD_CMDSTS_ERR_DEV_NOT_EN = 50,
IDXD_CMDSTS_ERR_INVAL_INT_IDX = 65,
IDXD_CMDSTS_ERR_NO_HANDLE = 66,
};
enum idxd_wq_hw_state {
IDXD_WQ_DEV_DISABLED = 0,
IDXD_WQ_DEV_ENABLED = 1,
IDXD_WQ_DEV_BUSY = 2,
};
struct idxd_hw_desc {
uint32_t pasid: 20;
uint32_t rsvd: 11;
uint32_t priv: 1;
uint32_t flags: 24;
uint32_t opcode: 8;
uint64_t completion_addr;
union {
uint64_t src_addr;
uint64_t readback_addr;
uint64_t pattern;
uint64_t desc_list_addr;
};
union {
uint64_t dst_addr;
uint64_t readback_addr2;
uint64_t src2_addr;
uint64_t comp_pattern;
};
union {
uint32_t xfer_size;
uint32_t desc_count;
};
uint16_t int_handle;
uint16_t rsvd1;
union {
uint8_t expected_res;
struct delta {
uint64_t addr;
uint32_t max_size;
} delta;
uint32_t delta_rec_size;
uint64_t dest2;
struct crc {
uint32_t seed;
uint32_t rsvd;
uint64_t addr;
} crc;
struct dif_chk {
uint8_t src_flags;
uint8_t rsvd1;
uint8_t flags;
uint8_t rsvd2[5];
uint32_t ref_tag_seed;
uint16_t app_tag_mask;
uint16_t app_tag_seed;
} dif_chk;
struct dif_ins {
uint8_t rsvd1;
uint8_t dest_flag;
uint8_t flags;
uint8_t rsvd2[13];
uint32_t ref_tag_seed;
uint16_t app_tag_mask;
uint16_t app_tag_seed;
} dif_ins;
struct dif_upd {
uint8_t src_flags;
uint8_t dest_flags;
uint8_t flags;
uint8_t rsvd[5];
uint32_t src_ref_tag_seed;
uint16_t src_app_tag_mask;
uint16_t src_app_tag_seed;
uint32_t dest_ref_tag_seed;
uint16_t dest_app_tag_mask;
uint16_t dest_app_tag_seed;
} dif_upd;
uint8_t op_specific[24];
};
} __attribute__((packed));
SPDK_STATIC_ASSERT(sizeof(struct idxd_hw_desc) == 64, "size mismatch");
struct idxd_hw_comp_record {
volatile uint8_t status;
union {
uint8_t result;
uint8_t dif_status;
};
uint16_t rsvd;
uint32_t bytes_completed;
uint64_t fault_addr;
union {
uint16_t delta_rec_size;
uint16_t crc_val;
struct {
uint32_t dif_chk_ref_tag;
uint16_t dif_chk_app_tag_mask;
uint16_t dif_chk_app_tag;
};
struct dif_ins_comp {
uint64_t rsvd;
uint32_t ref_tag;
uint16_t app_tag_mask;
uint16_t app_tag;
} dif_ins_comp;
struct dif_upd_comp {
uint32_t src_ref_tag;
uint16_t src_app_tag_mask;
uint16_t src_app_tag;
uint32_t dest_ref_tag;
uint16_t dest_app_tag_mask;
uint16_t dest_app_tag;
} dif_upd_comp;
uint8_t op_specific[16];
};
} __attribute__((packed));
SPDK_STATIC_ASSERT(sizeof(struct idxd_hw_comp_record) == 32, "size mismatch");
union idxd_gencap_register {
struct {
uint64_t block_on_fault: 1;
uint64_t overlap_copy: 1;
uint64_t cache_control_mem: 1;
uint64_t cache_control_cache: 1;
uint64_t rsvd: 3;
uint64_t int_handle_req: 1;
uint64_t dest_readback: 1;
uint64_t drain_readback: 1;
uint64_t rsvd2: 6;
uint64_t max_xfer_shift: 5;
uint64_t max_batch_shift: 4;
uint64_t max_ims_mult: 6;
uint64_t config_en: 1;
uint64_t max_descs_per_engine: 8;
uint64_t rsvd3: 24;
} __attribute__((packed));
uint64_t raw;
};
SPDK_STATIC_ASSERT(sizeof(union idxd_gencap_register) == 8, "size mismatch");
union idxd_wqcap_register {
struct {
uint64_t total_wq_size: 16;
uint64_t num_wqs: 8;
uint64_t rsvd: 24;
uint64_t shared_mode: 1;
uint64_t dedicated_mode: 1;
uint64_t rsvd2: 1;
uint64_t priority: 1;
uint64_t occupancy: 1;
uint64_t occupancy_int: 1;
uint64_t rsvd3: 10;
} __attribute__((packed));
uint64_t raw;
};
SPDK_STATIC_ASSERT(sizeof(union idxd_wqcap_register) == 8, "size mismatch");
union idxd_groupcap_register {
struct {
uint64_t num_groups: 8;
uint64_t total_tokens: 8;
uint64_t token_en: 1;
uint64_t token_limit: 1;
uint64_t rsvd: 46;
} __attribute__((packed));
uint64_t raw;
};
SPDK_STATIC_ASSERT(sizeof(union idxd_groupcap_register) == 8, "size mismatch");
union idxd_enginecap_register {
struct {
uint64_t num_engines: 8;
uint64_t rsvd: 56;
} __attribute__((packed));
uint64_t raw;
};
SPDK_STATIC_ASSERT(sizeof(union idxd_enginecap_register) == 8, "size mismatch");
struct idxd_opcap_register {
uint64_t raw[4];
};
SPDK_STATIC_ASSERT(sizeof(struct idxd_opcap_register) == 32, "size mismatch");
struct idxd_registers {
uint32_t version;
union idxd_gencap_register gencap;
union idxd_wqcap_register wqcap;
union idxd_groupcap_register groupcap;
union idxd_enginecap_register enginecap;
struct idxd_opcap_register opcap;
};
SPDK_STATIC_ASSERT(sizeof(struct idxd_registers) == 72, "size mismatch");
union idxd_offsets_register {
struct {
uint64_t grpcfg: 16;
uint64_t wqcfg: 16;
uint64_t msix_perm: 16;
uint64_t ims: 16;
uint64_t perfmon: 16;
uint64_t rsvd: 48;
} __attribute__((packed));
uint64_t raw[2];
};
SPDK_STATIC_ASSERT(sizeof(union idxd_offsets_register) == 16, "size mismatch");
union idxd_genstatus_register {
struct {
uint32_t state: 2;
uint32_t reset_type: 2;
uint32_t rsvd: 28;
} __attribute__((packed));
uint32_t raw;
};
SPDK_STATIC_ASSERT(sizeof(union idxd_genstatus_register) == 4, "size mismatch");
union idxd_cmdsts_reg {
struct {
uint8_t err;
uint16_t result;
uint8_t rsvd: 7;
uint8_t active: 1;
} __attribute__((packed));
uint32_t raw;
};
SPDK_STATIC_ASSERT(sizeof(union idxd_cmdsts_reg) == 4, "size mismatch");
union idxd_swerr_register {
struct {
uint64_t valid: 1;
uint64_t overflow: 1;
uint64_t desc_valid: 1;
uint64_t wq_idx_valid: 1;
uint64_t batch: 1;
uint64_t fault_rw: 1;
uint64_t priv: 1;
uint64_t rsvd: 1;
uint64_t error: 8;
uint64_t wq_idx: 8;
uint64_t rsvd2: 8;
uint64_t operation: 8;
uint64_t pasid: 20;
uint64_t rsvd3: 4;
uint64_t batch_idx: 16;
uint64_t rsvd4: 16;
uint64_t invalid_flags: 32;
uint64_t fault_addr;
uint64_t rsvd5;
} __attribute__((packed));
uint64_t raw[4];
};
SPDK_STATIC_ASSERT(sizeof(union idxd_swerr_register) == 32, "size mismatch");
union idxd_group_flags {
struct {
uint32_t tc_a: 3;
uint32_t tc_b: 3;
uint32_t rsvd: 1;
uint32_t use_token_limit: 1;
uint32_t tokens_reserved: 8;
uint32_t rsvd2: 4;
uint32_t tokens_allowed: 8;
uint32_t rsvd3: 4;
} __attribute__((packed));
uint32_t raw;
};
SPDK_STATIC_ASSERT(sizeof(union idxd_group_flags) == 4, "size mismatch");
struct idxd_grpcfg {
uint64_t wqs[4];
uint64_t engines;
union idxd_group_flags flags;
};
SPDK_STATIC_ASSERT(sizeof(struct idxd_grpcfg) == 48, "size mismatch");
union idxd_wqcfg {
struct {
uint16_t wq_size;
uint16_t rsvd;
uint16_t wq_thresh;
uint16_t rsvd1;
uint32_t mode: 1;
uint32_t bof: 1;
uint32_t rsvd2: 2;
uint32_t priority: 4;
uint32_t pasid: 20;
uint32_t pasid_en: 1;
uint32_t priv: 1;
uint32_t rsvd3: 2;
uint32_t max_xfer_shift: 5;
uint32_t max_batch_shift: 4;
uint32_t rsvd4: 23;
uint16_t occupancy_inth;
uint16_t occupancy_table_sel: 1;
uint16_t rsvd5: 15;
uint16_t occupancy_limit;
uint16_t occupancy_int_en: 1;
uint16_t rsvd6: 15;
uint16_t occupancy;
uint16_t occupancy_int: 1;
uint16_t rsvd7: 12;
uint16_t mode_support: 1;
uint16_t wq_state: 2;
uint32_t rsvd8;
} __attribute__((packed));
uint32_t raw[8];
};
SPDK_STATIC_ASSERT(sizeof(union idxd_wqcfg) == 32, "size mismatch");
#ifdef __cplusplus
}
#endif
#endif /* SPDK_IDXD_SPEC_H */

View File

@ -45,6 +45,7 @@ DEPDIRS-log :=
DEPDIRS-rte_vhost :=
DEPDIRS-ioat := log
DEPDIRS-idxd := log util
DEPDIRS-sock := log
DEPDIRS-util := log
DEPDIRS-vmd := log

View File

@ -221,6 +221,14 @@ function get_config_params() {
config_params+=' --with-rdma'
fi
intel="GenuineIntel"
cpu_vendor=$(grep -i 'vendor' /proc/cpuinfo --max-count=1)
if [[ "$cpu_vendor" != *"$intel"* ]]; then
config_params+=" --without-idxd"
else
config_params+=" --with-idxd"
fi
if [[ -d $CONFIG_FIO_SOURCE_DIR ]]; then
config_params+=" --with-fio=$CONFIG_FIO_SOURCE_DIR"
fi