numam-spdk/lib/nvme/nvme_impl.h

330 lines
9.6 KiB
C
Raw Normal View History

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* NVMe driver integration callbacks
*
* This file describes the callback functions required to integrate
* the userspace NVMe driver for a specific implementation. This
* implementation is specific for DPDK. Users would
* revise it as necessary for their own particular environment if not
* using it within the DPDK framework.
*/
#ifndef __NVME_IMPL_H__
#define __NVME_IMPL_H__
#include "spdk/vtophys.h"
#include "spdk/pci.h"
#include "spdk/nvme_spec.h"
#include <assert.h>
#include <unistd.h>
#include <rte_config.h>
#include <rte_cycles.h>
#include <rte_malloc.h>
#include <rte_mempool.h>
#ifdef USE_PCIACCESS
#include <pciaccess.h>
#else
#include <rte_pci.h>
#endif
#include "spdk/pci.h"
#include "spdk/pci_ids.h"
#include "spdk/nvme_spec.h"
/**
* \page nvme_driver_integration NVMe Driver Integration
*
* Users can integrate the userspace NVMe driver into their environment
* by implementing the callbacks in nvme_impl.h. These callbacks
* enable users to specify how to allocate pinned and physically
* contiguous memory, performance virtual to physical address
* translations, log messages, PCI configuration and register mapping,
* and a number of other facilities that may differ depending on the
* environment.
*/
/**
* Allocate a pinned, physically contiguous memory buffer with the
* given size and alignment.
* Note: these calls are only made during driver initialization. Per
* I/O allocations during driver operation use the nvme_alloc_request
* callback.
*/
static inline void *
nvme_malloc(const char *tag, size_t size, unsigned align, uint64_t *phys_addr)
{
void *buf = rte_zmalloc(tag, size, align);
*phys_addr = rte_malloc_virt2phy(buf);
return buf;
}
/**
* Free a memory buffer previously allocated with nvme_malloc.
*/
#define nvme_free(buf) rte_free(buf)
/**
* Log or print a message from the NVMe driver.
*/
#define nvme_printf(ctrlr, fmt, args...) printf(fmt, ##args)
/**
* Assert a condition and panic/abort as desired. Failures of these
* assertions indicate catastrophic failures within the driver.
*/
#define nvme_assert(check, str) assert(check)
/**
* Return the physical address for the specified virtual address.
*/
#define nvme_vtophys(buf) spdk_vtophys(buf)
#define NVME_VTOPHYS_ERROR SPDK_VTOPHYS_ERROR
extern struct rte_mempool *request_mempool;
/**
* Return a buffer for an nvme_request object. These objects are allocated
* for each I/O. They do not need to be pinned nor physically contiguous.
*/
#define nvme_alloc_request(bufp) rte_mempool_get(request_mempool, (void **)(bufp));
/**
* Free a buffer previously allocated with nvme_alloc_request().
*/
#define nvme_dealloc_request(buf) rte_mempool_put(request_mempool, buf)
/**
* Get a monotonic timestamp counter (used for measuring timeouts during initialization).
*/
#define nvme_get_tsc() rte_get_timer_cycles()
/**
* Get the tick rate of nvme_get_tsc() per second.
*/
#define nvme_get_tsc_hz() rte_get_timer_hz()
/**
*
*/
#define nvme_pcicfg_read32(handle, var, offset) spdk_pci_device_cfg_read32(handle, var, offset)
#define nvme_pcicfg_write32(handle, var, offset) spdk_pci_device_cfg_write32(handle, var, offset)
struct nvme_pci_enum_ctx {
int (*user_enum_cb)(void *enum_ctx, struct spdk_pci_device *pci_dev);
void *user_enum_ctx;
};
#ifdef USE_PCIACCESS
static int
nvme_pci_enum_cb(void *enum_ctx, struct spdk_pci_device *pci_dev)
{
struct nvme_pci_enum_ctx *ctx = enum_ctx;
if (spdk_pci_device_get_class(pci_dev) != SPDK_PCI_CLASS_NVME) {
return 0;
}
return ctx->user_enum_cb(ctx->user_enum_ctx, pci_dev);
}
static inline int
nvme_pci_enumerate(int (*enum_cb)(void *enum_ctx, struct spdk_pci_device *pci_dev), void *enum_ctx)
{
struct nvme_pci_enum_ctx nvme_enum_ctx;
nvme_enum_ctx.user_enum_cb = enum_cb;
nvme_enum_ctx.user_enum_ctx = enum_ctx;
return spdk_pci_enumerate(nvme_pci_enum_cb, &nvme_enum_ctx);
}
static inline int
nvme_pcicfg_map_bar(void *devhandle, uint32_t bar, uint32_t read_only, void **mapped_addr)
{
struct pci_device *dev = devhandle;
uint32_t flags = (read_only ? 0 : PCI_DEV_MAP_FLAG_WRITABLE);
return pci_device_map_range(dev, dev->regions[bar].base_addr, dev->regions[bar].size,
flags, mapped_addr);
}
static inline int
nvme_pcicfg_map_bar_write_combine(void *devhandle, uint32_t bar, void **mapped_addr)
{
struct pci_device *dev = devhandle;
uint32_t flags = PCI_DEV_MAP_FLAG_WRITABLE | PCI_DEV_MAP_FLAG_WRITE_COMBINE;
return pci_device_map_range(dev, dev->regions[bar].base_addr, dev->regions[bar].size,
flags, mapped_addr);
}
static inline int
nvme_pcicfg_unmap_bar(void *devhandle, uint32_t bar, void *addr)
{
struct pci_device *dev = devhandle;
return pci_device_unmap_range(dev, addr, dev->regions[bar].size);
}
static inline void
nvme_pcicfg_get_bar_addr_len(void *devhandle, uint32_t bar, uint64_t *addr, uint64_t *size)
{
struct pci_device *dev = devhandle;
*addr = (uint64_t)dev->regions[bar].base_addr;
*size = (uint64_t)dev->regions[bar].size;
}
#else /* !USE_PCIACCESS */
static inline int
nvme_pcicfg_map_bar(void *devhandle, uint32_t bar, uint32_t read_only, void **mapped_addr)
{
struct rte_pci_device *dev = devhandle;
*mapped_addr = dev->mem_resource[bar].addr;
return 0;
}
static inline int
nvme_pcicfg_map_bar_write_combine(void *devhandle, uint32_t bar, void **mapped_addr)
{
nvme_printf(NULL, "DPDK cannot support write combine now\n");
return -1;
}
static inline int
nvme_pcicfg_unmap_bar(void *devhandle, uint32_t bar, void *addr)
{
return 0;
}
static inline void
nvme_pcicfg_get_bar_addr_len(void *devhandle, uint32_t bar, uint64_t *addr, uint64_t *size)
{
struct rte_pci_device *dev = devhandle;
*addr = (uint64_t)dev->mem_resource[bar].phys_addr;
*size = (uint64_t)dev->mem_resource[bar].len;
}
/*
* TODO: once DPDK supports matching class code instead of device ID, switch to SPDK_PCI_CLASS_NVME
*/
static struct rte_pci_id nvme_pci_driver_id[] = {
{RTE_PCI_DEVICE(0x8086, 0x0953)},
{ .vendor_id = 0, /* sentinel */ },
};
/*
* TODO: eliminate this global if possible (does rte_pci_driver have a context field for this?)
*
* This should be protected by the NVMe driver lock, since nvme_probe() holds the lock
* while calling nvme_pci_enumerate(), but we shouldn't have to depend on that.
*/
static struct nvme_pci_enum_ctx g_nvme_pci_enum_ctx;
static int
nvme_driver_init(struct rte_pci_driver *dr, struct rte_pci_device *rte_dev)
{
/*
* These are actually the same type internally.
* TODO: refactor this so it's inside pci.c
*/
struct spdk_pci_device *pci_dev = (struct spdk_pci_device *)rte_dev;
/*
* TODO: This is a workaround for an issue where the device is not ready after VFIO reset.
* Figure out what is actually going wrong and remove this sleep.
*/
usleep(500 * 1000);
return g_nvme_pci_enum_ctx.user_enum_cb(g_nvme_pci_enum_ctx.user_enum_ctx, pci_dev);
}
static struct rte_pci_driver nvme_rte_driver = {
.name = "nvme_driver",
.devinit = nvme_driver_init,
.id_table = nvme_pci_driver_id,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
};
static inline int
nvme_pci_enumerate(int (*enum_cb)(void *enum_ctx, struct spdk_pci_device *pci_dev), void *enum_ctx)
{
int rc;
g_nvme_pci_enum_ctx.user_enum_cb = enum_cb;
g_nvme_pci_enum_ctx.user_enum_ctx = enum_ctx;
rte_eal_pci_register(&nvme_rte_driver);
rc = rte_eal_pci_probe();
rte_eal_pci_unregister(&nvme_rte_driver);
return rc;
}
#endif /* !USE_PCIACCESS */
typedef pthread_mutex_t nvme_mutex_t;
#define nvme_mutex_init(x) pthread_mutex_init((x), NULL)
#define nvme_mutex_destroy(x) pthread_mutex_destroy((x))
#define nvme_mutex_lock pthread_mutex_lock
#define nvme_mutex_unlock pthread_mutex_unlock
#define NVME_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
static inline int
nvme_mutex_init_recursive(nvme_mutex_t *mtx)
{
pthread_mutexattr_t attr;
int rc = 0;
if (pthread_mutexattr_init(&attr)) {
return -1;
}
if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
pthread_mutex_init(mtx, &attr)) {
rc = -1;
}
pthread_mutexattr_destroy(&attr);
return rc;
}
#endif /* __NVME_IMPL_H__ */