3272320c73
The previous method for registering I/O queues did not allow the user to specify queue priority for weighted round robin arbitration, and it limited the application to one queue per controller per thread. Change the API to require explicit allocation of each queue for each controller using the new function spdk_nvme_ctrlr_alloc_io_qpair(). Each function that submits a command on an I/O queue now takes an explicit qpair parameter rather than implicitly using the thread-local queue. This also allows the application to allocate different numbers of threads per controller; previously, the number of queues was capped at the smallest value supported by any attached controller. Weighted round robin arbitration is not supported yet; additional changes to the controller startup process are required to enable alternate arbitration methods. Change-Id: Ia33be1050a6953bc5a3cca9284aefcd95b01116e Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
282 lines
7.8 KiB
C
282 lines
7.8 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright (c) Intel Corporation.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "nvme_internal.h"
|
|
|
|
/** \file
|
|
*
|
|
*/
|
|
|
|
struct nvme_driver g_nvme_driver = {
|
|
.lock = NVME_MUTEX_INITIALIZER,
|
|
.init_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_driver.init_ctrlrs),
|
|
.attached_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_driver.attached_ctrlrs),
|
|
};
|
|
|
|
int32_t spdk_nvme_retry_count;
|
|
|
|
|
|
/**
|
|
* \page nvme_initialization NVMe Initialization
|
|
|
|
\msc
|
|
|
|
app [label="Application"], nvme [label="NVMe Driver"];
|
|
app=>nvme [label="nvme_probe()"];
|
|
app<<nvme [label="probe_cb(pci_dev)"];
|
|
nvme=>nvme [label="nvme_attach(devhandle)"];
|
|
nvme=>nvme [label="nvme_ctrlr_start(nvme_controller ptr)"];
|
|
nvme=>nvme [label="identify controller"];
|
|
nvme=>nvme [label="create queue pairs"];
|
|
nvme=>nvme [label="identify namespace(s)"];
|
|
app<<nvme [label="attach_cb(pci_dev, nvme_controller)"];
|
|
app=>app [label="create block devices based on controller's namespaces"];
|
|
|
|
\endmsc
|
|
|
|
*/
|
|
|
|
static struct spdk_nvme_ctrlr *
|
|
nvme_attach(void *devhandle)
|
|
{
|
|
struct spdk_nvme_ctrlr *ctrlr;
|
|
int status;
|
|
uint64_t phys_addr = 0;
|
|
|
|
ctrlr = nvme_malloc("nvme_ctrlr", sizeof(struct spdk_nvme_ctrlr),
|
|
64, &phys_addr);
|
|
if (ctrlr == NULL) {
|
|
nvme_printf(NULL, "could not allocate ctrlr\n");
|
|
return NULL;
|
|
}
|
|
|
|
status = nvme_ctrlr_construct(ctrlr, devhandle);
|
|
if (status != 0) {
|
|
nvme_free(ctrlr);
|
|
return NULL;
|
|
}
|
|
|
|
return ctrlr;
|
|
}
|
|
|
|
int
|
|
spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr)
|
|
{
|
|
struct nvme_driver *driver = &g_nvme_driver;
|
|
|
|
nvme_mutex_lock(&driver->lock);
|
|
|
|
nvme_ctrlr_destruct(ctrlr);
|
|
TAILQ_REMOVE(&g_nvme_driver.attached_ctrlrs, ctrlr, tailq);
|
|
nvme_free(ctrlr);
|
|
|
|
nvme_mutex_unlock(&driver->lock);
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl)
|
|
{
|
|
struct nvme_completion_poll_status *status = arg;
|
|
|
|
/*
|
|
* Copy status into the argument passed by the caller, so that
|
|
* the caller can check the status to determine if the
|
|
* the request passed or failed.
|
|
*/
|
|
memcpy(&status->cpl, cpl, sizeof(*cpl));
|
|
status->done = true;
|
|
}
|
|
|
|
size_t
|
|
spdk_nvme_request_size(void)
|
|
{
|
|
return sizeof(struct nvme_request);
|
|
}
|
|
|
|
struct nvme_request *
|
|
nvme_allocate_request(const struct nvme_payload *payload, uint32_t payload_size,
|
|
spdk_nvme_cmd_cb cb_fn, void *cb_arg)
|
|
{
|
|
struct nvme_request *req = NULL;
|
|
|
|
nvme_alloc_request(&req);
|
|
|
|
if (req == NULL) {
|
|
return req;
|
|
}
|
|
|
|
/*
|
|
* Only memset up to (but not including) the children
|
|
* TAILQ_ENTRY. children, and following members, are
|
|
* only used as part of I/O splitting so we avoid
|
|
* memsetting them until it is actually needed.
|
|
* They will be initialized in nvme_request_add_child()
|
|
* if the request is split.
|
|
*/
|
|
memset(req, 0, offsetof(struct nvme_request, children));
|
|
req->cb_fn = cb_fn;
|
|
req->cb_arg = cb_arg;
|
|
req->payload = *payload;
|
|
req->payload_size = payload_size;
|
|
|
|
return req;
|
|
}
|
|
|
|
struct nvme_request *
|
|
nvme_allocate_request_contig(void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
|
|
void *cb_arg)
|
|
{
|
|
struct nvme_payload payload;
|
|
|
|
payload.type = NVME_PAYLOAD_TYPE_CONTIG;
|
|
payload.u.contig = buffer;
|
|
|
|
return nvme_allocate_request(&payload, payload_size, cb_fn, cb_arg);
|
|
}
|
|
|
|
struct nvme_request *
|
|
nvme_allocate_request_null(spdk_nvme_cmd_cb cb_fn, void *cb_arg)
|
|
{
|
|
return nvme_allocate_request_contig(NULL, 0, cb_fn, cb_arg);
|
|
}
|
|
|
|
void
|
|
nvme_free_request(struct nvme_request *req)
|
|
{
|
|
nvme_assert(req != NULL, ("nvme_free_request(NULL)\n"));
|
|
nvme_dealloc_request(req);
|
|
}
|
|
|
|
struct nvme_enum_ctx {
|
|
spdk_nvme_probe_cb probe_cb;
|
|
void *cb_ctx;
|
|
};
|
|
|
|
/* This function must only be called while holding g_nvme_driver.lock */
|
|
static int
|
|
nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
|
|
{
|
|
struct nvme_enum_ctx *enum_ctx = ctx;
|
|
struct spdk_nvme_ctrlr *ctrlr;
|
|
|
|
/* Verify that this controller is not already attached */
|
|
TAILQ_FOREACH(ctrlr, &g_nvme_driver.attached_ctrlrs, tailq) {
|
|
/* NOTE: This assumes that the PCI abstraction layer will use the same device handle
|
|
* across enumerations; we could compare by BDF instead if this is not true.
|
|
*/
|
|
if (pci_dev == ctrlr->devhandle) {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (enum_ctx->probe_cb(enum_ctx->cb_ctx, pci_dev)) {
|
|
ctrlr = nvme_attach(pci_dev);
|
|
if (ctrlr == NULL) {
|
|
nvme_printf(NULL, "nvme_attach() failed\n");
|
|
return -1;
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&g_nvme_driver.init_ctrlrs, ctrlr, tailq);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb)
|
|
{
|
|
int rc, start_rc;
|
|
struct nvme_enum_ctx enum_ctx;
|
|
struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
|
|
|
|
nvme_mutex_lock(&g_nvme_driver.lock);
|
|
|
|
enum_ctx.probe_cb = probe_cb;
|
|
enum_ctx.cb_ctx = cb_ctx;
|
|
|
|
rc = nvme_pci_enumerate(nvme_enum_cb, &enum_ctx);
|
|
/*
|
|
* Keep going even if one or more nvme_attach() calls failed,
|
|
* but maintain the value of rc to signal errors when we return.
|
|
*/
|
|
|
|
/* Initialize all new controllers in the init_ctrlrs list in parallel. */
|
|
while (!TAILQ_EMPTY(&g_nvme_driver.init_ctrlrs)) {
|
|
TAILQ_FOREACH_SAFE(ctrlr, &g_nvme_driver.init_ctrlrs, tailq, ctrlr_tmp) {
|
|
/* Drop the driver lock while calling nvme_ctrlr_process_init()
|
|
* since it needs to acquire the driver lock internally when calling
|
|
* nvme_ctrlr_start().
|
|
*
|
|
* TODO: Rethink the locking - maybe reset should take the lock so that start() and
|
|
* the functions it calls (in particular nvme_ctrlr_set_num_qpairs())
|
|
* can assume it is held.
|
|
*/
|
|
nvme_mutex_unlock(&g_nvme_driver.lock);
|
|
start_rc = nvme_ctrlr_process_init(ctrlr);
|
|
nvme_mutex_lock(&g_nvme_driver.lock);
|
|
|
|
if (start_rc) {
|
|
/* Controller failed to initialize. */
|
|
TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq);
|
|
nvme_ctrlr_destruct(ctrlr);
|
|
nvme_free(ctrlr);
|
|
rc = -1;
|
|
break;
|
|
}
|
|
|
|
if (ctrlr->state == NVME_CTRLR_STATE_READY) {
|
|
/*
|
|
* Controller has been initialized.
|
|
* Move it to the attached_ctrlrs list.
|
|
*/
|
|
TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq);
|
|
TAILQ_INSERT_TAIL(&g_nvme_driver.attached_ctrlrs, ctrlr, tailq);
|
|
|
|
/*
|
|
* Unlock while calling attach_cb() so the user can call other functions
|
|
* that may take the driver lock, like nvme_detach().
|
|
*/
|
|
nvme_mutex_unlock(&g_nvme_driver.lock);
|
|
attach_cb(cb_ctx, ctrlr->devhandle, ctrlr);
|
|
nvme_mutex_lock(&g_nvme_driver.lock);
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
nvme_mutex_unlock(&g_nvme_driver.lock);
|
|
return rc;
|
|
}
|