diff --git a/CONFIG b/CONFIG index 93e0e97838..71dd95c17a 100644 --- a/CONFIG +++ b/CONFIG @@ -169,3 +169,6 @@ CONFIG_FUSE=n # Build with RAID5 support CONFIG_RAID5=n + +# Build with IDXD support +CONFIG_IDXD=n diff --git a/configure b/configure index fb29f4ff2a..f49f6e50ee 100755 --- a/configure +++ b/configure @@ -52,6 +52,8 @@ function usage() echo " Required on some systems to use qat devices. This flag is" echo " effective only with the default dpdk submodule." echo " No path required" + echo " idxd Build the IDXD library and accel framework plug-in module." + echo " Disabled while experimental. Only built for x86 when enabled." echo " crypto Build vbdev crypto module." echo " No path required." echo " fio Build fio_plugin." @@ -447,6 +449,12 @@ for i in "$@"; do --without-raid5) CONFIG[RAID5]=n ;; + --with-idxd) + CONFIG[IDXD]=y + ;; + --without-idxd) + CONFIG[IDXD]=n + ;; --) break ;; @@ -463,6 +471,16 @@ else BUILD_CMD=($CC -o /dev/null -x c $CPPFLAGS $CFLAGS $LDFLAGS) fi +# IDXD uses Intel specific instructions. +if [[ "${CONFIG[IDXD]}" = "y" ]]; then + intel="GenuineIntel" + cpu_vendor=$(grep -i 'vendor' /proc/cpuinfo --max-count=1) + if [[ "$cpu_vendor" != *"$intel"* ]]; then + echo "ERROR: IDXD cannot be used due to CPU incompatiblity." + exit 1 + fi +fi + # Detect architecture and force no ISA-L if non-x86 or non-aarch64 architecture if [[ "${CONFIG[ISAL]}" = "y" ]]; then if [[ $arch != x86_64* ]] && [[ $arch != aarch64* ]]; then diff --git a/include/spdk/env.h b/include/spdk/env.h index 48e4c80c02..7bdcde4630 100644 --- a/include/spdk/env.h +++ b/include/spdk/env.h @@ -706,6 +706,13 @@ struct spdk_pci_driver *spdk_pci_vmd_get_driver(void); */ struct spdk_pci_driver *spdk_pci_ioat_get_driver(void); +/** + * Get the IDXD PCI driver object. + * + * \return PCI driver. + */ +struct spdk_pci_driver *spdk_pci_idxd_get_driver(void); + /** * Get the Virtio PCI driver object. * diff --git a/include/spdk/idxd.h b/include/spdk/idxd.h new file mode 100644 index 0000000000..f7f1106fd6 --- /dev/null +++ b/include/spdk/idxd.h @@ -0,0 +1,207 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * IDXD DMA engine driver public interface + */ + +#ifndef SPDK_IDXD_H +#define SPDK_IDXD_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/env.h" + +/** + * Opaque handle for a single IDXD channel. + */ +struct spdk_idxd_io_channel; + +/** + * Opaque handle for a single IDXD device. + */ +struct spdk_idxd_device; + +/** + * Signature for configuring a channel + * + * \param chan IDXD channel to be configured. + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan); + +/** + * Reconfigures this channel based on how many current channels there are. + * + * \param chan IDXD channel to be set. + * \param num_channels total number of channels in use. + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels); + +/** + * Signature for callback function invoked when a request is completed. + * + * \param arg User-specified opaque value corresponding to cb_arg from the + * request submission. + * \param status 0 on success, negative errno on failure. + */ +typedef void (*spdk_idxd_req_cb)(void *arg, int status); + +/** + * Callback for spdk_idxd_probe() enumeration. + * + * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_idxd_probe(). + * \param pci_dev PCI device that is being probed. + * + * \return true to attach to this device. + */ +typedef bool (*spdk_idxd_probe_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev); + +/** + * Callback for spdk_idxd_probe() to report a device that has been attached to + * the userspace IDXD driver. + * + * \param cb_ctx User-specified opaque value corresponding to cb_ctx from spdk_idxd_probe(). + * \param pci_dev PCI device that was attached to the driver. + * \param idxd IDXD device that was attached to the driver. + */ +typedef void (*spdk_idxd_attach_cb)(void *cb_ctx, struct spdk_pci_device *pci_dev, + struct spdk_idxd_device *idxd); + +/** + * Enumerate the IDXD devices attached to the system and attach the userspace + * IDXD driver to them if desired. + * + * If called more than once, only devices that are not already attached to the + * SPDK IDXD driver will be reported. + * + * To stop using the controller and release its associated resources, call + * spdk_idxd_detach() with the idxd_channel instance returned by this function. + * + * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of + * the callbacks. + * \param probe_cb will be called once per IDXD device found in the system. + * \param attach_cb will be called for devices for which probe_cb returned true + * once the IDXD controller has been attached to the userspace driver. + * + * \return 0 on success, -1 on failure. + */ +int spdk_idxd_probe(void *cb_ctx, spdk_idxd_probe_cb probe_cb, spdk_idxd_attach_cb attach_cb); + +/** + * Detach specified device returned by spdk_idxd_probe() from the IDXD driver. + * + * \param idxd IDXD device to detach from the driver. + */ +void spdk_idxd_detach(struct spdk_idxd_device *idxd); + +/** + * Sets the IDXD configuration. + * + * \param config_number the configuration number for a valid IDXD config. + */ +void spdk_idxd_set_config(int config_number); + +/** + * Build and submit a DMA engine memory copy request. + * + * This function will build the copy descriptor and then immediately submit + * by writing to the proper device portal. + * + * \param chan IDXD channel to submit request. + * \param dst Destination virtual address. + * \param src Source virtual address. + * \param nbytes Number of bytes to copy. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the arg parameter in + * the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan, + void *dst, const void *src, uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Build and submit a DMA engine memory fill request. + * + * This function will build the fill descriptor and then immediately submit + * by writing to the proper device portal. + * + * \param chan IDXD channel to submit request. + * \param dst Destination virtual address. + * \param fill_pattern Repeating eight-byte pattern to use for memory fill. + * \param nbytes Number of bytes to fill. + * \param cb_fn Callback function which will be called when the request is complete. + * \param cb_arg Opaque value which will be passed back as the cb_arg parameter + * in the completion callback. + * + * \return 0 on success, negative errno on failure. + */ +int spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan, + void *dst, uint64_t fill_pattern, uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg); + +/** + * Check for completed requests on an IDXD channel. + * + * \param chan IDXD channel to check for completions. + */ +void spdk_idxd_process_events(struct spdk_idxd_io_channel *chan); + +/** + * Returns an IDXD channel for a given IDXD device. + * + * \param idxd IDXD device to get a channel for. + * + * \return pointer to an IDXD channel. + */ +struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd); + +/** + * Free an IDXD channel. + * + * \param chan IDXD channel to free. + */ +void spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/spdk/pci_ids.h b/include/spdk/pci_ids.h index 3245465f00..659f44ff28 100644 --- a/include/spdk/pci_ids.h +++ b/include/spdk/pci_ids.h @@ -62,6 +62,8 @@ extern "C" { */ #define SPDK_PCI_CLASS_NVME 0x010802 +#define PCI_DEVICE_ID_INTEL_IDXD 0x0b25 + #define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20 #define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21 #define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22 diff --git a/include/spdk_internal/idxd.h b/include/spdk_internal/idxd.h new file mode 100644 index 0000000000..a4f7dde371 --- /dev/null +++ b/include/spdk_internal/idxd.h @@ -0,0 +1,74 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IDXD_INTERNAL_H__ +#define __IDXD_INTERNAL_H__ + +#include "spdk/stdinc.h" + +#include "spdk/idxd.h" +#include "spdk/queue.h" +#include "spdk/mmio.h" +#include "spdk/bit_array.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define IDXD_MAX_CONFIG_NUM 1 + +enum dsa_opcode { + IDXD_OPCODE_NOOP = 0, + IDXD_OPCODE_BATCH = 1, + IDXD_OPCODE_DRAIN = 2, + IDXD_OPCODE_MEMMOVE = 3, + IDXD_OPCODE_MEMFILL = 4, + IDXD_OPCODE_COMPARE = 5, + IDXD_OPCODE_COMPVAL = 6, + IDXD_OPCODE_CR_DELTA = 7, + IDXD_OPCODE_AP_DELTA = 8, + IDXD_OPCODE_DUALCAST = 9, + IDXD_OPCODE_CRCGEN = 16, + IDXD_OPCODE_COPY_CRC = 17, + IDXD_OPCODE_DIF_CHECK = 18, + IDXD_OPCODE_DIF_INS = 19, + IDXD_OPCODE_DIF_STRP = 20, + IDXD_OPCODE_DIF_UPDT = 21, + IDXD_OPCODE_CFLUSH = 32, +}; + +#ifdef __cplusplus +} +#endif + +#endif /* __IDXD_INTERNAL_H__ */ diff --git a/lib/Makefile b/lib/Makefile index 12ff7844eb..914939ac98 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -43,7 +43,7 @@ DIRS-y += nbd ftl endif DIRS-$(CONFIG_OCF) += env_ocf - +DIRS-$(CONFIG_IDXD) += idxd DIRS-$(CONFIG_VHOST) += vhost DIRS-$(CONFIG_VIRTIO) += virtio DIRS-$(CONFIG_REDUCE) += reduce diff --git a/lib/env_dpdk/Makefile b/lib/env_dpdk/Makefile index 033472e325..8078d6495f 100644 --- a/lib/env_dpdk/Makefile +++ b/lib/env_dpdk/Makefile @@ -40,7 +40,7 @@ SO_SUFFIX := $(SO_VER).$(SO_MINOR) CFLAGS += $(ENV_CFLAGS) C_SRCS = env.c memory.c pci.c init.c threads.c -C_SRCS += pci_nvme.c pci_ioat.c pci_virtio.c pci_vmd.c +C_SRCS += pci_nvme.c pci_ioat.c pci_virtio.c pci_vmd.c pci_idxd.c LIBNAME = env_dpdk SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_env_dpdk.map) diff --git a/lib/env_dpdk/pci_idxd.c b/lib/env_dpdk/pci_idxd.c new file mode 100644 index 0000000000..42f3da1ce7 --- /dev/null +++ b/lib/env_dpdk/pci_idxd.c @@ -0,0 +1,64 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "env_internal.h" + +#include "spdk/pci_ids.h" + +#define SPDK_IDXD_PCI_DEVICE(DEVICE_ID) RTE_PCI_DEVICE(SPDK_PCI_VID_INTEL, DEVICE_ID) +static struct rte_pci_id idxd_driver_id[] = { + {SPDK_IDXD_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IDXD)}, + { .vendor_id = 0, /* sentinel */ }, +}; + +static struct spdk_pci_driver g_idxd_pci_drv = { + .driver = { + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .id_table = idxd_driver_id, + .probe = pci_device_init, + .remove = pci_device_fini, + .driver.name = "spdk_idxd", + }, + + .cb_fn = NULL, + .cb_arg = NULL, + .is_registered = false, +}; + +struct spdk_pci_driver * +spdk_pci_idxd_get_driver(void) +{ + return &g_idxd_pci_drv; +} + +SPDK_PMD_REGISTER_PCI(g_idxd_pci_drv); diff --git a/lib/idxd/Makefile b/lib/idxd/Makefile new file mode 100644 index 0000000000..1d6bf69103 --- /dev/null +++ b/lib/idxd/Makefile @@ -0,0 +1,40 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +C_SRCS = idxd.c +LIBNAME = idxd + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/idxd/idxd.c b/lib/idxd/idxd.c new file mode 100644 index 0000000000..88d038acd7 --- /dev/null +++ b/lib/idxd/idxd.c @@ -0,0 +1,756 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/util.h" +#include "spdk/memory.h" + +#include "spdk_internal/log.h" +#include "spdk_internal/idxd.h" + +#include "idxd.h" + +pthread_mutex_t g_driver_lock = PTHREAD_MUTEX_INITIALIZER; + +/* + * g_dev_cfg gives us 2 pre-set configurations of DSA to choose from + * via RPC. + */ +struct device_config *g_dev_cfg = NULL; + +/* + * Pre-built configurations. Variations depend on various factors + * including how many different types of target latency profiles there + * are, how many different QOS requirements there might be, etc. + */ +struct device_config g_dev_cfg0 = { + .config_num = 0, + .num_groups = 4, + .num_wqs_per_group = 1, + .num_engines_per_group = 1, + .total_wqs = 4, + .total_engines = 4, +}; + +struct device_config g_dev_cfg1 = { + .config_num = 1, + .num_groups = 2, + .num_wqs_per_group = 2, + .num_engines_per_group = 2, + .total_wqs = 4, + .total_engines = 4, +}; + +static uint32_t +_idxd_read_4(struct spdk_idxd_device *idxd, uint32_t offset) +{ + return spdk_mmio_read_4((uint32_t *)(idxd->reg_base + offset)); +} + +static void +_idxd_write_4(struct spdk_idxd_device *idxd, uint32_t offset, uint32_t value) +{ + spdk_mmio_write_4((uint32_t *)(idxd->reg_base + offset), value); +} + +static uint64_t +_idxd_read_8(struct spdk_idxd_device *idxd, uint32_t offset) +{ + return spdk_mmio_read_8((uint64_t *)(idxd->reg_base + offset)); +} + +static void +_idxd_write_8(struct spdk_idxd_device *idxd, uint32_t offset, uint64_t value) +{ + spdk_mmio_write_8((uint64_t *)(idxd->reg_base + offset), value); +} + +struct spdk_idxd_io_channel * +spdk_idxd_get_channel(struct spdk_idxd_device *idxd) +{ + struct spdk_idxd_io_channel *chan; + + chan = calloc(1, sizeof(struct spdk_idxd_io_channel)); + if (chan == NULL) { + SPDK_ERRLOG("Failed to allocate idxd chan\n"); + return NULL; + } + chan->idxd = idxd; + + return chan; +} + +void +spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan) +{ + free(chan); +} + +int +spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan) +{ + uint32_t num_ring_slots; + + chan->idxd->wq_id++; + if (chan->idxd->wq_id == g_dev_cfg->total_wqs) { + chan->idxd->wq_id = 0; + } + + num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size; + + chan->ring_ctrl.ring_slots = spdk_bit_array_create(num_ring_slots); + if (chan->ring_ctrl.ring_slots == NULL) { + SPDK_ERRLOG("Failed to allocate bit array for ring\n"); + return -ENOMEM; + } + + /* + * max ring slots can change as channels come and go but we + * start off getting all of the slots for this work queue. + */ + chan->ring_ctrl.max_ring_slots = num_ring_slots; + + /* Store the original size of the ring. */ + chan->ring_ctrl.ring_size = num_ring_slots; + + chan->ring_ctrl.data_desc = spdk_zmalloc(num_ring_slots * sizeof(struct idxd_hw_desc), + 0x40, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (chan->ring_ctrl.data_desc == NULL) { + SPDK_ERRLOG("Failed to allocate descriptor memory\n"); + spdk_bit_array_free(&chan->ring_ctrl.ring_slots); + return -ENOMEM; + } + + chan->ring_ctrl.completions = spdk_zmalloc(num_ring_slots * sizeof(struct idxd_comp), + 0x40, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (chan->ring_ctrl.completions == NULL) { + SPDK_ERRLOG("Failed to allocate completion memory\n"); + spdk_bit_array_free(&chan->ring_ctrl.ring_slots); + spdk_free(chan->ring_ctrl.data_desc); + return -ENOMEM; + } + + chan->ring_ctrl.portal = (char *)chan->idxd->portals + chan->idxd->wq_id * PORTAL_SIZE; + + return 0; +} + +static void +_idxd_drain(struct spdk_idxd_io_channel *chan) +{ + uint32_t index; + int set = 0; + + /* + * TODO this is a temp solution to drain until getting the drain cmd to work, this + * provides equivalent functionality but just doesn't use the device to do it. + */ + do { + spdk_idxd_process_events(chan); + set = 0; + for (index = 0; index < chan->ring_ctrl.max_ring_slots; index++) { + set |= spdk_bit_array_get(chan->ring_ctrl.ring_slots, index); + } + } while (set); +} + +int +spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan, uint32_t num_channels) +{ + uint32_t num_ring_slots; + int rc; + + _idxd_drain(chan); + + assert(spdk_bit_array_count_set(chan->ring_ctrl.ring_slots) == 0); + + if (num_channels == 0) { + spdk_free(chan->ring_ctrl.completions); + spdk_free(chan->ring_ctrl.data_desc); + spdk_bit_array_free(&chan->ring_ctrl.ring_slots); + return 0; + } + + num_ring_slots = chan->ring_ctrl.ring_size / num_channels; + + /* re-allocate our descriptor ring for hw flow control. */ + rc = spdk_bit_array_resize(&chan->ring_ctrl.ring_slots, num_ring_slots); + if (rc < 0) { + SPDK_ERRLOG("Unable to resize channel bit array\n"); + return -ENOMEM; + } + + chan->ring_ctrl.max_ring_slots = num_ring_slots; + + return rc; +} + +/* Called via RPC to select a pre-defined configuration. */ +void +spdk_idxd_set_config(int config_num) +{ + switch (config_num) { + case 0: + g_dev_cfg = &g_dev_cfg0; + break; + case 1: + g_dev_cfg = &g_dev_cfg1; + break; + default: + g_dev_cfg = &g_dev_cfg0; + SPDK_ERRLOG("Invalid config, using default\n"); + break; + } +} + +static int +idxd_unmap_pci_bar(struct spdk_idxd_device *idxd, int bar) +{ + int rc = 0; + void *addr = NULL; + + if (bar == IDXD_MMIO_BAR) { + addr = (void *)idxd->reg_base; + } else if (bar == IDXD_WQ_BAR) { + addr = (void *)idxd->portals; + } + + if (addr) { + rc = spdk_pci_device_unmap_bar(idxd->device, 0, addr); + } + return rc; +} + +static int +idxd_map_pci_bars(struct spdk_idxd_device *idxd) +{ + int rc; + void *addr; + uint64_t phys_addr, size; + + rc = spdk_pci_device_map_bar(idxd->device, IDXD_MMIO_BAR, &addr, &phys_addr, &size); + if (rc != 0 || addr == NULL) { + SPDK_ERRLOG("pci_device_map_range failed with error code %d\n", rc); + return -1; + } + idxd->reg_base = addr; + + rc = spdk_pci_device_map_bar(idxd->device, IDXD_WQ_BAR, &addr, &phys_addr, &size); + if (rc != 0 || addr == NULL) { + SPDK_ERRLOG("pci_device_map_range failed with error code %d\n", rc); + rc = idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR); + if (rc) { + SPDK_ERRLOG("unable to unmap MMIO bar\n"); + } + return -EINVAL; + } + idxd->portals = addr; + + return 0; +} + +/* Used for control commands, not for descriptor submission. */ +static int +idxd_wait_cmd(struct spdk_idxd_device *idxd, int _timeout) +{ + uint32_t timeout = _timeout; + union idxd_cmdsts_reg cmd_status = {}; + + cmd_status.raw = _idxd_read_4(idxd, IDXD_CMDSTS_OFFSET); + while (cmd_status.active && --timeout) { + usleep(1); + cmd_status.raw = _idxd_read_4(idxd, IDXD_CMDSTS_OFFSET); + } + + /* Check for timeout */ + if (timeout == 0 && cmd_status.active) { + SPDK_ERRLOG("Command timeout, waited %u\n", _timeout); + return -EBUSY; + } + + /* Check for error */ + if (cmd_status.err) { + SPDK_ERRLOG("Command status reg reports error 0x%x\n", cmd_status.err); + return -EINVAL; + } + + return 0; +} + +static int +idxd_reset_dev(struct spdk_idxd_device *idxd) +{ + int rc; + + _idxd_write_4(idxd, IDXD_CMD_OFFSET, IDXD_RESET_DEVICE << IDXD_CMD_SHIFT); + rc = idxd_wait_cmd(idxd, IDXD_REGISTER_TIMEOUT_US); + if (rc < 0) { + SPDK_ERRLOG("Error resetting device %u\n", rc); + } + + return rc; +} + +/* + * Build group config based on getting info from the device combined + * with the defined configuration. Once built, it is written to the + * device. + */ +static int +idxd_group_config(struct spdk_idxd_device *idxd) +{ + int i; + uint64_t base_offset; + + assert(g_dev_cfg->num_groups <= idxd->registers.groupcap.num_groups); + idxd->groups = calloc(idxd->registers.groupcap.num_groups, sizeof(struct idxd_group)); + if (idxd->groups == NULL) { + SPDK_ERRLOG("Failed to allocate group memory\n"); + return -ENOMEM; + } + + assert(g_dev_cfg->total_engines <= idxd->registers.enginecap.num_engines); + for (i = 0; i < g_dev_cfg->total_engines; i++) { + idxd->groups[i % g_dev_cfg->num_groups].grpcfg.engines |= (1 << i); + } + + assert(g_dev_cfg->total_wqs <= idxd->registers.wqcap.num_wqs); + for (i = 0; i < g_dev_cfg->total_wqs; i++) { + idxd->groups[i % g_dev_cfg->num_groups].grpcfg.wqs[0] |= (1 << i); + } + + for (i = 0; i < g_dev_cfg->num_groups; i++) { + idxd->groups[i].idxd = idxd; + idxd->groups[i].id = i; + + /* Divide BW tokens evenly */ + idxd->groups[i].grpcfg.flags.tokens_allowed = + idxd->registers.groupcap.total_tokens / g_dev_cfg->num_groups; + } + + /* + * Now write the group config to the device for all groups. We write + * to the max number of groups in order to 0 out the ones we didn't + * configure. + */ + for (i = 0 ; i < idxd->registers.groupcap.num_groups; i++) { + + base_offset = idxd->grpcfg_offset + i * 64; + + /* GRPWQCFG, work queues config */ + _idxd_write_8(idxd, base_offset, idxd->groups[i].grpcfg.wqs[0]); + + /* GRPENGCFG, engine config */ + _idxd_write_8(idxd, base_offset + CFG_ENGINE_OFFSET, idxd->groups[i].grpcfg.engines); + + /* GRPFLAGS, flags config */ + _idxd_write_8(idxd, base_offset + CFG_FLAG_OFFSET, idxd->groups[i].grpcfg.flags.raw); + } + + return 0; +} + +/* + * Build work queue (WQ) config based on getting info from the device combined + * with the defined configuration. Once built, it is written to the device. + */ +static int +idxd_wq_config(struct spdk_idxd_device *idxd) +{ + int i, j; + struct idxd_wq *queue; + u_int32_t wq_size = idxd->registers.wqcap.total_wq_size / g_dev_cfg->total_wqs; + + SPDK_NOTICELOG("Total ring slots available space 0x%x, so per work queue is 0x%x\n", + idxd->registers.wqcap.total_wq_size, wq_size); + assert(g_dev_cfg->total_wqs <= IDXD_MAX_QUEUES); + assert(g_dev_cfg->total_wqs <= idxd->registers.wqcap.num_wqs); + assert(LOG2_WQ_MAX_BATCH <= idxd->registers.gencap.max_batch_shift); + assert(LOG2_WQ_MAX_XFER <= idxd->registers.gencap.max_xfer_shift); + + idxd->queues = calloc(1, idxd->registers.wqcap.num_wqs * sizeof(struct idxd_wq)); + if (idxd->queues == NULL) { + SPDK_ERRLOG("Failed to allocate queue memory\n"); + return -ENOMEM; + } + + for (i = 0; i < g_dev_cfg->total_wqs; i++) { + queue = &idxd->queues[i]; + queue->wqcfg.wq_size = wq_size; + queue->wqcfg.mode = WQ_MODE_DEDICATED; + queue->wqcfg.max_batch_shift = LOG2_WQ_MAX_BATCH; + queue->wqcfg.max_xfer_shift = LOG2_WQ_MAX_XFER; + queue->wqcfg.wq_state = WQ_ENABLED; + queue->wqcfg.priority = WQ_PRIORITY_1; + + /* Not part of the config struct */ + queue->idxd = idxd; + queue->group = &idxd->groups[i % g_dev_cfg->num_groups]; + } + + /* + * Now write the work queue config to the device for all wq space + */ + for (i = 0 ; i < idxd->registers.wqcap.num_wqs; i++) { + queue = &idxd->queues[i]; + for (j = 0 ; j < WQCFG_NUM_DWORDS; j++) { + _idxd_write_4(idxd, idxd->wqcfg_offset + i * 32 + j * 4, + queue->wqcfg.raw[j]); + } + } + + return 0; +} + +static int +idxd_device_configure(struct spdk_idxd_device *idxd) +{ + int i, rc = 0; + union idxd_offsets_register offsets_reg; + union idxd_genstatus_register genstatus_reg; + + /* + * Map BAR0 and BAR2 + */ + rc = idxd_map_pci_bars(idxd); + if (rc) { + return rc; + } + + /* + * Reset the device + */ + rc = idxd_reset_dev(idxd); + if (rc) { + goto err_reset; + } + + /* + * Read in config registers + */ + idxd->registers.version = _idxd_read_4(idxd, IDXD_VERSION_OFFSET); + idxd->registers.gencap.raw = _idxd_read_8(idxd, IDXD_GENCAP_OFFSET); + idxd->registers.wqcap.raw = _idxd_read_8(idxd, IDXD_WQCAP_OFFSET); + idxd->registers.groupcap.raw = _idxd_read_8(idxd, IDXD_GRPCAP_OFFSET); + idxd->registers.enginecap.raw = _idxd_read_8(idxd, IDXD_ENGCAP_OFFSET); + for (i = 0; i < IDXD_OPCAP_WORDS; i++) { + idxd->registers.opcap.raw[i] = + _idxd_read_8(idxd, i * sizeof(uint64_t) + IDXD_OPCAP_OFFSET); + } + offsets_reg.raw[0] = _idxd_read_8(idxd, IDXD_TABLE_OFFSET); + offsets_reg.raw[1] = _idxd_read_8(idxd, IDXD_TABLE_OFFSET + sizeof(uint64_t)); + idxd->grpcfg_offset = offsets_reg.grpcfg * IDXD_TABLE_OFFSET_MULT; + idxd->wqcfg_offset = offsets_reg.wqcfg * IDXD_TABLE_OFFSET_MULT; + idxd->ims_offset = offsets_reg.ims * IDXD_TABLE_OFFSET_MULT; + idxd->msix_perm_offset = offsets_reg.msix_perm * IDXD_TABLE_OFFSET_MULT; + idxd->perfmon_offset = offsets_reg.perfmon * IDXD_TABLE_OFFSET_MULT; + + /* + * Configure groups and work queues. + */ + rc = idxd_group_config(idxd); + if (rc) { + goto err_group_cfg; + } + + rc = idxd_wq_config(idxd); + if (rc) { + goto err_wq_cfg; + } + + /* + * Enable the device + */ + genstatus_reg.raw = _idxd_read_4(idxd, IDXD_GENSTATUS_OFFSET); + assert(genstatus_reg.state == IDXD_DEVICE_STATE_DISABLED); + + _idxd_write_4(idxd, IDXD_CMD_OFFSET, IDXD_ENABLE_DEV << IDXD_CMD_SHIFT); + rc = idxd_wait_cmd(idxd, IDXD_REGISTER_TIMEOUT_US); + genstatus_reg.raw = _idxd_read_4(idxd, IDXD_GENSTATUS_OFFSET); + if ((rc < 0) || (genstatus_reg.state != IDXD_DEVICE_STATE_ENABLED)) { + rc = -EINVAL; + SPDK_ERRLOG("Error enabling device %u\n", rc); + goto err_device_enable; + } + + genstatus_reg.raw = spdk_mmio_read_4((uint32_t *)(idxd->reg_base + IDXD_GENSTATUS_OFFSET)); + assert(genstatus_reg.state == IDXD_DEVICE_STATE_ENABLED); + + /* + * Enable the work queues that we've configured + */ + for (i = 0; i < g_dev_cfg->total_wqs; i++) { + _idxd_write_4(idxd, IDXD_CMD_OFFSET, + (IDXD_ENABLE_WQ << IDXD_CMD_SHIFT) | i); + rc = idxd_wait_cmd(idxd, IDXD_REGISTER_TIMEOUT_US); + if (rc < 0) { + SPDK_ERRLOG("Error enabling work queues 0x%x\n", rc); + goto err_wq_enable; + } + } + + if ((rc == 0) && (genstatus_reg.state == IDXD_DEVICE_STATE_ENABLED)) { + SPDK_NOTICELOG("Device enabled, version 0x%x gencap: 0x%lx\n", + idxd->registers.version, + idxd->registers.gencap.raw); + + } + + return rc; +err_wq_enable: +err_device_enable: + free(idxd->queues); +err_wq_cfg: + free(idxd->groups); +err_group_cfg: +err_reset: + idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR); + idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR); + + return rc; +} + +static void +idxd_device_destruct(struct spdk_idxd_device *idxd) +{ + idxd_unmap_pci_bar(idxd, IDXD_MMIO_BAR); + idxd_unmap_pci_bar(idxd, IDXD_WQ_BAR); + free(idxd->groups); + free(idxd->queues); + free(idxd); +} + +/* Caller must hold g_driver_lock */ +static struct spdk_idxd_device * +idxd_attach(struct spdk_pci_device *device) +{ + struct spdk_idxd_device *idxd; + uint32_t cmd_reg; + int rc; + + idxd = calloc(1, sizeof(struct spdk_idxd_device)); + if (idxd == NULL) { + SPDK_ERRLOG("Failed to allocate memory for idxd device.\n"); + return NULL; + } + + idxd->device = device; + + /* Enable PCI busmaster. */ + spdk_pci_device_cfg_read32(device, &cmd_reg, 4); + cmd_reg |= 0x4; + spdk_pci_device_cfg_write32(device, cmd_reg, 4); + + rc = idxd_device_configure(idxd); + if (rc) { + goto err; + } + + return idxd; +err: + idxd_device_destruct(idxd); + return NULL; +} + +struct idxd_enum_ctx { + spdk_idxd_probe_cb probe_cb; + spdk_idxd_attach_cb attach_cb; + void *cb_ctx; +}; + +/* This function must only be called while holding g_driver_lock */ +static int +idxd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) +{ + struct idxd_enum_ctx *enum_ctx = ctx; + struct spdk_idxd_device *idxd; + + if (enum_ctx->probe_cb(enum_ctx->cb_ctx, pci_dev)) { + idxd = idxd_attach(pci_dev); + if (idxd == NULL) { + SPDK_ERRLOG("idxd_attach() failed\n"); + return -EINVAL; + } + + enum_ctx->attach_cb(enum_ctx->cb_ctx, pci_dev, idxd); + } + + return 0; +} + +int +spdk_idxd_probe(void *cb_ctx, spdk_idxd_probe_cb probe_cb, spdk_idxd_attach_cb attach_cb) +{ + int rc; + struct idxd_enum_ctx enum_ctx; + + enum_ctx.probe_cb = probe_cb; + enum_ctx.attach_cb = attach_cb; + enum_ctx.cb_ctx = cb_ctx; + + pthread_mutex_lock(&g_driver_lock); + rc = spdk_pci_enumerate(spdk_pci_idxd_get_driver(), idxd_enum_cb, &enum_ctx); + pthread_mutex_unlock(&g_driver_lock); + + return rc; +} + +void +spdk_idxd_detach(struct spdk_idxd_device *idxd) +{ + idxd_device_destruct(idxd); +} + +int +spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan, void *dst, const void *src, + uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg) +{ + uint32_t index; + struct idxd_hw_desc *desc; + struct idxd_comp *comp; + + index = spdk_bit_array_find_first_clear(chan->ring_ctrl.ring_slots, 0); + if (index == UINT32_MAX) { + /* ran out of ring slots */ + return -EBUSY; + } + + spdk_bit_array_set(chan->ring_ctrl.ring_slots, index); + + desc = &chan->ring_ctrl.data_desc[index]; + comp = &chan->ring_ctrl.completions[index]; + + desc->opcode = IDXD_OPCODE_MEMMOVE; + desc->flags = IDXD_FLAG_COMPLETION_ADDR_VALID | IDXD_FLAG_REQUEST_COMPLETION; + desc->completion_addr = (uintptr_t)&comp->hw; + desc->src_addr = (uintptr_t)src; + desc->dst_addr = (uintptr_t)dst; + desc->xfer_size = nbytes; + comp->cb_arg = (uint64_t)cb_arg; + comp->cb_fn = cb_fn; + + movdir64b((uint64_t *)chan->ring_ctrl.portal, desc); + + return 0; +} + +int +spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan, void *dst, uint64_t fill_pattern, + uint64_t nbytes, + spdk_idxd_req_cb cb_fn, void *cb_arg) +{ + uint32_t index; + struct idxd_hw_desc *desc; + struct idxd_comp *comp; + + index = spdk_bit_array_find_first_clear(chan->ring_ctrl.ring_slots, 0); + if (index == UINT32_MAX) { + /* ran out of ring slots */ + return -EBUSY; + } + + spdk_bit_array_set(chan->ring_ctrl.ring_slots, index); + + desc = &chan->ring_ctrl.data_desc[index]; + comp = &chan->ring_ctrl.completions[index]; + + desc->opcode = IDXD_OPCODE_MEMFILL; + desc->flags = IDXD_FLAG_COMPLETION_ADDR_VALID | IDXD_FLAG_REQUEST_COMPLETION; + desc->completion_addr = (uintptr_t)&comp->hw; + desc->pattern = fill_pattern; + desc->dst_addr = (uintptr_t)dst; + desc->xfer_size = nbytes; + comp->cb_arg = (uint64_t)cb_arg; + comp->cb_fn = cb_fn; + + movdir64b((uint64_t *)chan->ring_ctrl.portal, desc); + + return 0; +} + +static void +_dump_error_reg(struct spdk_idxd_io_channel *chan) +{ + uint64_t sw_error_0; + uint16_t i; + + sw_error_0 = _idxd_read_8(chan->idxd, IDXD_SWERR_OFFSET); + + SPDK_NOTICELOG("SW Error bits set:"); + for (i = 0; i < CHAR_BIT; i++) { + if ((1ULL << i) & sw_error_0) { + SPDK_NOTICELOG(" %d\n", i); + } + } + SPDK_NOTICELOG("SW Error error code: %#x\n", (uint8_t)(sw_error_0 >> 8)); + SPDK_NOTICELOG("SW Error WQ index: %u\n", (uint8_t)(sw_error_0 >> 16)); + SPDK_NOTICELOG("SW Error Operation: %u\n", (uint8_t)(sw_error_0 >> 32)); +} + +/* + * TODO: Experiment with different methods of reaping completions for performance + * once we have real silicon. + */ +void +spdk_idxd_process_events(struct spdk_idxd_io_channel *chan) +{ + uint16_t index; + struct idxd_comp *comp; + uint64_t sw_error_0; + int status = 0; + + for (index = 0; index < chan->ring_ctrl.max_ring_slots; index++) { + if (spdk_bit_array_get(chan->ring_ctrl.ring_slots, index)) { + comp = &chan->ring_ctrl.completions[index]; + if (comp->hw.status == 1) { + sw_error_0 = _idxd_read_8(chan->idxd, IDXD_SWERR_OFFSET); + if (sw_error_0 & 0x1) { + _dump_error_reg(chan); + status = -EINVAL; + } + + comp->cb_fn((void *)comp->cb_arg, status); + comp->hw.status = status = 0; + spdk_bit_array_clear(chan->ring_ctrl.ring_slots, index); + } + } + } +} + +SPDK_LOG_REGISTER_COMPONENT("idxd", SPDK_LOG_IDXD) diff --git a/lib/idxd/idxd.h b/lib/idxd/idxd.h new file mode 100644 index 0000000000..be5de8af10 --- /dev/null +++ b/lib/idxd/idxd.h @@ -0,0 +1,165 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IDXD_H__ +#define __IDXD_H__ + +#include "spdk/stdinc.h" + +#include "spdk/idxd.h" +#include "spdk/queue.h" +#include "spdk/mmio.h" +#include "spdk/bit_array.h" + +#include "idxd_spec.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* TODO: get the gcc intrinsic to work. */ +#define nop() asm volatile ("nop") +static inline void movdir64b(void *dst, const void *src) +{ + asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" + : "=m"(*(char *)dst) + : "d"(src), "a"(dst)); +} + +#define IDXD_REGISTER_TIMEOUT_US 50 + +/* TODO: make some of these RPC selectable */ +#define WQ_MODE_DEDICATED 1 +#define LOG2_WQ_MAX_BATCH 8 /* 2^8 = 256 */ +#define LOG2_WQ_MAX_XFER 30 /* 2^30 = 1073741824 */ +#define WQCFG_NUM_DWORDS 8 +#define WQ_PRIORITY_1 1 +#define IDXD_MAX_QUEUES 64 + + +struct device_config { + uint8_t config_num; + uint8_t num_wqs_per_group; + uint8_t num_engines_per_group; + uint8_t num_groups; + uint16_t total_wqs; + uint16_t total_engines; +}; + +struct idxd_ring_control { + void *portal; + + uint16_t ring_size; + + /* + * Rings for this channel, one for descriptors and one + * for completions, share the same index. Future will + * include a separate ring for batch descriptors once + * the batch interface is completed. + */ + struct idxd_hw_desc *data_desc; + struct idxd_comp *completions; + + /* + * We use one bit array to track ring slots for both + * data_desc and completions. + */ + struct spdk_bit_array *ring_slots; + uint32_t max_ring_slots; +}; + +struct spdk_idxd_io_channel { + struct spdk_idxd_device *idxd; + struct idxd_ring_control ring_ctrl; +}; + +struct pci_dev_id { + int vendor_id; + int device_id; +}; + +struct idxd_group { + struct spdk_idxd_device *idxd; + struct idxd_grpcfg grpcfg; + struct pci_dev_id pcidev; + int num_engines; + int num_wqs; + int id; + uint8_t tokens_allowed; + bool use_token_limit; + uint8_t tokens_reserved; + int tc_a; + int tc_b; +}; + +/* + * This struct wraps the hardware completion record which is 32 bytes in + * size and must be 32 byte aligned. + */ +struct idxd_comp { + struct idxd_hw_comp_record hw; + uint64_t cb_arg; + spdk_idxd_req_cb cb_fn; + uint64_t pad1; + uint64_t pad2; +} __attribute__((packed)); +SPDK_STATIC_ASSERT(sizeof(struct idxd_comp) == 64, "size mismatch"); + +struct idxd_wq { + struct spdk_idxd_device *idxd; + struct idxd_group *group; + union idxd_wqcfg wqcfg; +}; + +struct spdk_idxd_device { + struct spdk_pci_device *device; + void *reg_base; + void *portals; + int socket_id; + int wq_id; + + struct idxd_registers registers; + uint32_t ims_offset; + uint32_t msix_perm_offset; + uint32_t wqcfg_offset; + uint32_t grpcfg_offset; + uint32_t perfmon_offset; + struct idxd_group *groups; + struct idxd_wq *queues; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* __IDXD_H__ */ diff --git a/lib/idxd/idxd_spec.h b/lib/idxd/idxd_spec.h new file mode 100644 index 0000000000..81d5583f4f --- /dev/null +++ b/lib/idxd/idxd_spec.h @@ -0,0 +1,501 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * IDXD specification definitions + */ + +#ifndef SPDK_IDXD_SPEC_H +#define SPDK_IDXD_SPEC_H + +#include "spdk/stdinc.h" +#include "spdk/assert.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define IDXD_MMIO_BAR 0 +#define IDXD_WQ_BAR 2 +#define PORTAL_SIZE (4096 * 4) + +#define CFG_ENGINE_OFFSET 0x20 +#define CFG_FLAG_OFFSET 0x28 + +#define IDXD_CMD_SHIFT 20 + +#define IDXD_VERSION_OFFSET 0x00 +#define IDXD_GENCAP_OFFSET 0x10 +#define IDXD_WQCAP_OFFSET 0x20 +#define IDXD_GRPCAP_OFFSET 0x30 +#define IDXD_OPCAP_OFFSET 0x40 +#define IDXD_ENGCAP_OFFSET 0x38 +#define IDXD_OPCAP_OFFSET 0x40 +#define IDXD_TABLE_OFFSET 0x60 +#define IDXD_GENCFG_OFFSET 0x80 +#define IDXD_GENCTRL_OFFSET 0x88 +#define IDXD_GENSTATUS_OFFSET 0x90 +#define IDXD_INTCAUSE_OFFSET 0x98 +#define IDXD_CMD_OFFSET 0xa0 +#define IDXD_CMDSTS_OFFSET 0xa8 +#define IDXD_SWERR_OFFSET 0xc0 +#define IDXD_TABLE_OFFSET_MULT 0x100 + +#define IDXD_OPCAP_WORDS 0x4 + +#define IDXD_FLAG_FENCE (1 << 0) +#define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2) +#define IDXD_FLAG_REQUEST_COMPLETION (1 << 3) +#define IDXD_FLAG_CACHE_CONTROL (1 << 8) + +/* + * IDXD is a family of devices, DSA is the only currently + * supported one. + */ +enum dsa_completion_status { + IDXD_COMP_NONE = 0, + IDXD_COMP_SUCCESS = 1, + IDXD_COMP_SUCCESS_PRED = 2, + IDXD_COMP_PAGE_FAULT_NOBOF = 3, + IDXD_COMP_PAGE_FAULT_IR = 4, + IDXD_COMP_BATCH_FAIL = 5, + IDXD_COMP_BATCH_PAGE_FAULT = 6, + IDXD_COMP_DR_OFFSET_NOINC = 7, + IDXD_COMP_DR_OFFSET_ERANGE = 8, + IDXD_COMP_DIF_ERR = 9, + IDXD_COMP_BAD_OPCODE = 16, + IDXD_COMP_INVALID_FLAGS = 17, + IDXD_COMP_NOZERO_RESERVE = 18, + IDXD_COMP_XFER_ERANGE = 19, + IDXD_COMP_DESC_CNT_ERANGE = 20, + IDXD_COMP_DR_ERANGE = 21, + IDXD_COMP_OVERLAP_BUFFERS = 22, + IDXD_COMP_DCAST_ERR = 23, + IDXD_COMP_DESCLIST_ALIGN = 24, + IDXD_COMP_INT_HANDLE_INVAL = 25, + IDXD_COMP_CRA_XLAT = 26, + IDXD_COMP_CRA_ALIGN = 27, + IDXD_COMP_ADDR_ALIGN = 28, + IDXD_COMP_PRIV_BAD = 29, + IDXD_COMP_TRAFFIC_CLASS_CONF = 30, + IDXD_COMP_PFAULT_RDBA = 31, + IDXD_COMP_HW_ERR1 = 32, + IDXD_COMP_HW_ERR_DRB = 33, + IDXD_COMP_TRANSLATION_FAIL = 34, +}; + +enum idxd_wq_state { + WQ_DISABLED = 0, + WQ_ENABLED = 1, +}; + +enum idxd_wq_flag { + WQ_FLAG_DEDICATED = 0, + WQ_FLAG_BOF = 1, +}; + +enum idxd_wq_type { + WQT_NONE = 0, + WQT_KERNEL = 1, + WQT_USER = 2, + WQT_MDEV = 3, +}; + +enum idxd_dev_state { + IDXD_DEVICE_STATE_DISABLED = 0, + IDXD_DEVICE_STATE_ENABLED = 1, + IDXD_DEVICE_STATE_DRAIN = 2, + IDXD_DEVICE_STATE_HALT = 3, +}; + +enum idxd_device_reset_type { + IDXD_DEVICE_RESET_SOFTWARE = 0, + IDXD_DEVICE_RESET_FLR = 1, + IDXD_DEVICE_RESET_WARM = 2, + IDXD_DEVICE_RESET_COLD = 3, +}; + +enum idxd_cmds { + IDXD_ENABLE_DEV = 1, + IDXD_DISABLE_DEV = 2, + IDXD_DRAIN_ALL = 3, + IDXD_ABORT_ALL = 4, + IDXD_RESET_DEVICE = 5, + IDXD_ENABLE_WQ = 6, + IDXD_DISABLE_WQ = 7, + IDXD_DRAIN_WQ = 8, + IDXD_ABORT_WQ = 9, + IDXD_RESET_WQ = 10, +}; + +enum idxd_cmdsts_err { + IDXD_CMDSTS_SUCCESS = 0, + IDXD_CMDSTS_INVAL_CMD = 1, + IDXD_CMDSTS_INVAL_WQIDX = 2, + IDXD_CMDSTS_HW_ERR = 3, + IDXD_CMDSTS_ERR_DEV_ENABLED = 16, + IDXD_CMDSTS_ERR_CONFIG = 17, + IDXD_CMDSTS_ERR_BUSMASTER_EN = 18, + IDXD_CMDSTS_ERR_PASID_INVAL = 19, + IDXD_CMDSTS_ERR_WQ_SIZE_ERANGE = 20, + IDXD_CMDSTS_ERR_GRP_CONFIG = 21, + IDXD_CMDSTS_ERR_GRP_CONFIG2 = 22, + IDXD_CMDSTS_ERR_GRP_CONFIG3 = 23, + IDXD_CMDSTS_ERR_GRP_CONFIG4 = 24, + IDXD_CMDSTS_ERR_DEV_NOTEN = 32, + IDXD_CMDSTS_ERR_WQ_ENABLED = 33, + IDXD_CMDSTS_ERR_WQ_SIZE = 34, + IDXD_CMDSTS_ERR_WQ_PRIOR = 35, + IDXD_CMDSTS_ERR_WQ_MODE = 36, + IDXD_CMDSTS_ERR_BOF_EN = 37, + IDXD_CMDSTS_ERR_PASID_EN = 38, + IDXD_CMDSTS_ERR_MAX_BATCH_SIZE = 39, + IDXD_CMDSTS_ERR_MAX_XFER_SIZE = 40, + IDXD_CMDSTS_ERR_DIS_DEV_EN = 49, + IDXD_CMDSTS_ERR_DEV_NOT_EN = 50, + IDXD_CMDSTS_ERR_INVAL_INT_IDX = 65, + IDXD_CMDSTS_ERR_NO_HANDLE = 66, +}; + +enum idxd_wq_hw_state { + IDXD_WQ_DEV_DISABLED = 0, + IDXD_WQ_DEV_ENABLED = 1, + IDXD_WQ_DEV_BUSY = 2, +}; + +struct idxd_hw_desc { + uint32_t pasid: 20; + uint32_t rsvd: 11; + uint32_t priv: 1; + uint32_t flags: 24; + uint32_t opcode: 8; + uint64_t completion_addr; + union { + uint64_t src_addr; + uint64_t readback_addr; + uint64_t pattern; + uint64_t desc_list_addr; + }; + union { + uint64_t dst_addr; + uint64_t readback_addr2; + uint64_t src2_addr; + uint64_t comp_pattern; + }; + union { + uint32_t xfer_size; + uint32_t desc_count; + }; + uint16_t int_handle; + uint16_t rsvd1; + union { + uint8_t expected_res; + struct delta { + uint64_t addr; + uint32_t max_size; + } delta; + uint32_t delta_rec_size; + uint64_t dest2; + struct crc { + uint32_t seed; + uint32_t rsvd; + uint64_t addr; + } crc; + struct dif_chk { + uint8_t src_flags; + uint8_t rsvd1; + uint8_t flags; + uint8_t rsvd2[5]; + uint32_t ref_tag_seed; + uint16_t app_tag_mask; + uint16_t app_tag_seed; + } dif_chk; + struct dif_ins { + uint8_t rsvd1; + uint8_t dest_flag; + uint8_t flags; + uint8_t rsvd2[13]; + uint32_t ref_tag_seed; + uint16_t app_tag_mask; + uint16_t app_tag_seed; + } dif_ins; + struct dif_upd { + uint8_t src_flags; + uint8_t dest_flags; + uint8_t flags; + uint8_t rsvd[5]; + uint32_t src_ref_tag_seed; + uint16_t src_app_tag_mask; + uint16_t src_app_tag_seed; + uint32_t dest_ref_tag_seed; + uint16_t dest_app_tag_mask; + uint16_t dest_app_tag_seed; + } dif_upd; + uint8_t op_specific[24]; + }; +} __attribute__((packed)); +SPDK_STATIC_ASSERT(sizeof(struct idxd_hw_desc) == 64, "size mismatch"); + +struct idxd_hw_comp_record { + volatile uint8_t status; + union { + uint8_t result; + uint8_t dif_status; + }; + uint16_t rsvd; + uint32_t bytes_completed; + uint64_t fault_addr; + union { + uint16_t delta_rec_size; + uint16_t crc_val; + struct { + uint32_t dif_chk_ref_tag; + uint16_t dif_chk_app_tag_mask; + uint16_t dif_chk_app_tag; + }; + struct dif_ins_comp { + uint64_t rsvd; + uint32_t ref_tag; + uint16_t app_tag_mask; + uint16_t app_tag; + } dif_ins_comp; + struct dif_upd_comp { + uint32_t src_ref_tag; + uint16_t src_app_tag_mask; + uint16_t src_app_tag; + uint32_t dest_ref_tag; + uint16_t dest_app_tag_mask; + uint16_t dest_app_tag; + } dif_upd_comp; + uint8_t op_specific[16]; + }; +} __attribute__((packed)); +SPDK_STATIC_ASSERT(sizeof(struct idxd_hw_comp_record) == 32, "size mismatch"); + +union idxd_gencap_register { + struct { + uint64_t block_on_fault: 1; + uint64_t overlap_copy: 1; + uint64_t cache_control_mem: 1; + uint64_t cache_control_cache: 1; + uint64_t rsvd: 3; + uint64_t int_handle_req: 1; + uint64_t dest_readback: 1; + uint64_t drain_readback: 1; + uint64_t rsvd2: 6; + uint64_t max_xfer_shift: 5; + uint64_t max_batch_shift: 4; + uint64_t max_ims_mult: 6; + uint64_t config_en: 1; + uint64_t max_descs_per_engine: 8; + uint64_t rsvd3: 24; + } __attribute__((packed)); + uint64_t raw; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_gencap_register) == 8, "size mismatch"); + +union idxd_wqcap_register { + struct { + uint64_t total_wq_size: 16; + uint64_t num_wqs: 8; + uint64_t rsvd: 24; + uint64_t shared_mode: 1; + uint64_t dedicated_mode: 1; + uint64_t rsvd2: 1; + uint64_t priority: 1; + uint64_t occupancy: 1; + uint64_t occupancy_int: 1; + uint64_t rsvd3: 10; + } __attribute__((packed)); + uint64_t raw; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_wqcap_register) == 8, "size mismatch"); + +union idxd_groupcap_register { + struct { + uint64_t num_groups: 8; + uint64_t total_tokens: 8; + uint64_t token_en: 1; + uint64_t token_limit: 1; + uint64_t rsvd: 46; + } __attribute__((packed)); + uint64_t raw; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_groupcap_register) == 8, "size mismatch"); + +union idxd_enginecap_register { + struct { + uint64_t num_engines: 8; + uint64_t rsvd: 56; + } __attribute__((packed)); + uint64_t raw; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_enginecap_register) == 8, "size mismatch"); + +struct idxd_opcap_register { + uint64_t raw[4]; +}; +SPDK_STATIC_ASSERT(sizeof(struct idxd_opcap_register) == 32, "size mismatch"); + +struct idxd_registers { + uint32_t version; + union idxd_gencap_register gencap; + union idxd_wqcap_register wqcap; + union idxd_groupcap_register groupcap; + union idxd_enginecap_register enginecap; + struct idxd_opcap_register opcap; +}; +SPDK_STATIC_ASSERT(sizeof(struct idxd_registers) == 72, "size mismatch"); + +union idxd_offsets_register { + struct { + uint64_t grpcfg: 16; + uint64_t wqcfg: 16; + uint64_t msix_perm: 16; + uint64_t ims: 16; + uint64_t perfmon: 16; + uint64_t rsvd: 48; + } __attribute__((packed)); + uint64_t raw[2]; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_offsets_register) == 16, "size mismatch"); + +union idxd_genstatus_register { + struct { + uint32_t state: 2; + uint32_t reset_type: 2; + uint32_t rsvd: 28; + } __attribute__((packed)); + uint32_t raw; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_genstatus_register) == 4, "size mismatch"); + +union idxd_cmdsts_reg { + struct { + uint8_t err; + uint16_t result; + uint8_t rsvd: 7; + uint8_t active: 1; + } __attribute__((packed)); + uint32_t raw; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_cmdsts_reg) == 4, "size mismatch"); + +union idxd_swerr_register { + struct { + uint64_t valid: 1; + uint64_t overflow: 1; + uint64_t desc_valid: 1; + uint64_t wq_idx_valid: 1; + uint64_t batch: 1; + uint64_t fault_rw: 1; + uint64_t priv: 1; + uint64_t rsvd: 1; + uint64_t error: 8; + uint64_t wq_idx: 8; + uint64_t rsvd2: 8; + uint64_t operation: 8; + uint64_t pasid: 20; + uint64_t rsvd3: 4; + uint64_t batch_idx: 16; + uint64_t rsvd4: 16; + uint64_t invalid_flags: 32; + uint64_t fault_addr; + uint64_t rsvd5; + } __attribute__((packed)); + uint64_t raw[4]; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_swerr_register) == 32, "size mismatch"); + +union idxd_group_flags { + struct { + uint32_t tc_a: 3; + uint32_t tc_b: 3; + uint32_t rsvd: 1; + uint32_t use_token_limit: 1; + uint32_t tokens_reserved: 8; + uint32_t rsvd2: 4; + uint32_t tokens_allowed: 8; + uint32_t rsvd3: 4; + } __attribute__((packed)); + uint32_t raw; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_group_flags) == 4, "size mismatch"); + +struct idxd_grpcfg { + uint64_t wqs[4]; + uint64_t engines; + union idxd_group_flags flags; +}; +SPDK_STATIC_ASSERT(sizeof(struct idxd_grpcfg) == 48, "size mismatch"); + +union idxd_wqcfg { + struct { + uint16_t wq_size; + uint16_t rsvd; + uint16_t wq_thresh; + uint16_t rsvd1; + uint32_t mode: 1; + uint32_t bof: 1; + uint32_t rsvd2: 2; + uint32_t priority: 4; + uint32_t pasid: 20; + uint32_t pasid_en: 1; + uint32_t priv: 1; + uint32_t rsvd3: 2; + uint32_t max_xfer_shift: 5; + uint32_t max_batch_shift: 4; + uint32_t rsvd4: 23; + uint16_t occupancy_inth; + uint16_t occupancy_table_sel: 1; + uint16_t rsvd5: 15; + uint16_t occupancy_limit; + uint16_t occupancy_int_en: 1; + uint16_t rsvd6: 15; + uint16_t occupancy; + uint16_t occupancy_int: 1; + uint16_t rsvd7: 12; + uint16_t mode_support: 1; + uint16_t wq_state: 2; + uint32_t rsvd8; + } __attribute__((packed)); + uint32_t raw[8]; +}; +SPDK_STATIC_ASSERT(sizeof(union idxd_wqcfg) == 32, "size mismatch"); + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_IDXD_SPEC_H */ diff --git a/mk/spdk.lib_deps.mk b/mk/spdk.lib_deps.mk index 44e9428aae..a2cd8c5d55 100644 --- a/mk/spdk.lib_deps.mk +++ b/mk/spdk.lib_deps.mk @@ -45,6 +45,7 @@ DEPDIRS-log := DEPDIRS-rte_vhost := DEPDIRS-ioat := log +DEPDIRS-idxd := log util DEPDIRS-sock := log DEPDIRS-util := log DEPDIRS-vmd := log diff --git a/test/common/autotest_common.sh b/test/common/autotest_common.sh index c08b5c17dc..165531bec1 100755 --- a/test/common/autotest_common.sh +++ b/test/common/autotest_common.sh @@ -221,6 +221,14 @@ function get_config_params() { config_params+=' --with-rdma' fi + intel="GenuineIntel" + cpu_vendor=$(grep -i 'vendor' /proc/cpuinfo --max-count=1) + if [[ "$cpu_vendor" != *"$intel"* ]]; then + config_params+=" --without-idxd" + else + config_params+=" --with-idxd" + fi + if [[ -d $CONFIG_FIO_SOURCE_DIR ]]; then config_params+=" --with-fio=$CONFIG_FIO_SOURCE_DIR" fi