2016-07-20 18:16:23 +00:00
|
|
|
/*-
|
|
|
|
* BSD LICENSE
|
|
|
|
*
|
|
|
|
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
|
|
|
|
* Copyright (c) Intel Corporation.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in
|
|
|
|
* the documentation and/or other materials provided with the
|
|
|
|
* distribution.
|
|
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived
|
|
|
|
* from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2017-05-02 18:18:25 +00:00
|
|
|
#include "spdk/stdinc.h"
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-02 18:18:25 +00:00
|
|
|
#include "spdk/bdev.h"
|
2017-12-28 21:49:46 +00:00
|
|
|
#include "spdk/conf.h"
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-03-27 19:59:40 +00:00
|
|
|
#include "spdk/env.h"
|
2017-10-25 13:58:02 +00:00
|
|
|
#include "spdk/event.h"
|
2017-04-04 21:01:54 +00:00
|
|
|
#include "spdk/io_channel.h"
|
2017-05-09 21:32:49 +00:00
|
|
|
#include "spdk/likely.h"
|
2016-07-20 18:16:23 +00:00
|
|
|
#include "spdk/queue.h"
|
2017-01-18 21:43:15 +00:00
|
|
|
#include "spdk/nvme_spec.h"
|
2017-01-18 22:15:35 +00:00
|
|
|
#include "spdk/scsi_spec.h"
|
2017-08-29 05:24:52 +00:00
|
|
|
#include "spdk/util.h"
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-01-04 22:30:04 +00:00
|
|
|
#include "spdk_internal/bdev.h"
|
2016-11-07 22:10:28 +00:00
|
|
|
#include "spdk_internal/log.h"
|
2017-05-30 08:45:46 +00:00
|
|
|
#include "spdk/string.h"
|
|
|
|
|
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
#include "ittnotify.h"
|
2017-06-15 16:59:02 +00:00
|
|
|
#include "ittnotify_types.h"
|
|
|
|
int __itt_init_ittlib(const char *, __itt_group_id);
|
2017-05-30 08:45:46 +00:00
|
|
|
#endif
|
2016-11-07 22:10:28 +00:00
|
|
|
|
2017-12-28 03:11:55 +00:00
|
|
|
#define SPDK_BDEV_IO_POOL_SIZE (64 * 1024)
|
|
|
|
#define SPDK_BDEV_IO_CACHE_SIZE 256
|
|
|
|
#define BUF_SMALL_POOL_SIZE 8192
|
|
|
|
#define BUF_LARGE_POOL_SIZE 1024
|
|
|
|
#define NOMEM_THRESHOLD_COUNT 8
|
|
|
|
#define ZERO_BUFFER_SIZE 0x100000
|
|
|
|
#define SPDK_BDEV_QOS_TIMESLICE_IN_USEC 1000
|
|
|
|
#define SPDK_BDEV_SEC_TO_USEC 1000000ULL
|
|
|
|
#define SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE 1
|
2017-12-28 21:49:46 +00:00
|
|
|
#define SPDK_BDEV_QOS_MIN_IOS_PER_SEC 10000
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-09-12 16:37:52 +00:00
|
|
|
typedef TAILQ_HEAD(, spdk_bdev_io) bdev_io_tailq_t;
|
2018-01-05 21:55:38 +00:00
|
|
|
typedef STAILQ_HEAD(, spdk_bdev_io) bdev_io_stailq_t;
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
struct spdk_bdev_mgr {
|
|
|
|
struct spdk_mempool *bdev_io_pool;
|
|
|
|
|
|
|
|
struct spdk_mempool *buf_small_pool;
|
|
|
|
struct spdk_mempool *buf_large_pool;
|
|
|
|
|
2017-07-28 22:34:24 +00:00
|
|
|
void *zero_buffer;
|
|
|
|
|
2018-03-09 22:20:21 +00:00
|
|
|
TAILQ_HEAD(, spdk_bdev_module) bdev_modules;
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
TAILQ_HEAD(, spdk_bdev) bdevs;
|
2017-05-30 08:45:46 +00:00
|
|
|
|
2017-07-10 23:36:35 +00:00
|
|
|
bool init_complete;
|
|
|
|
bool module_init_complete;
|
|
|
|
|
2017-05-30 08:45:46 +00:00
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
__itt_domain *domain;
|
|
|
|
#endif
|
2017-05-09 21:09:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct spdk_bdev_mgr g_bdev_mgr = {
|
|
|
|
.bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules),
|
|
|
|
.bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs),
|
2017-07-10 23:36:35 +00:00
|
|
|
.init_complete = false,
|
|
|
|
.module_init_complete = false,
|
2017-05-09 21:09:28 +00:00
|
|
|
};
|
2016-08-01 21:31:02 +00:00
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
static spdk_bdev_init_cb g_init_cb_fn = NULL;
|
|
|
|
static void *g_init_cb_arg = NULL;
|
|
|
|
|
|
|
|
static spdk_bdev_fini_cb g_fini_cb_fn = NULL;
|
|
|
|
static void *g_fini_cb_arg = NULL;
|
2017-11-21 17:45:27 +00:00
|
|
|
static struct spdk_thread *g_fini_thread = NULL;
|
2017-05-26 04:58:04 +00:00
|
|
|
|
2017-06-15 19:01:53 +00:00
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
struct spdk_bdev_mgmt_channel {
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t need_buf_small;
|
|
|
|
bdev_io_stailq_t need_buf_large;
|
2017-12-20 15:20:23 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Each thread keeps a cache of bdev_io - this allows
|
|
|
|
* bdev threads which are *not* DPDK threads to still
|
|
|
|
* benefit from a per-thread bdev_io cache. Without
|
|
|
|
* this, non-DPDK threads fetching from the mempool
|
|
|
|
* incur a cmpxchg on get and put.
|
|
|
|
*/
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t per_thread_cache;
|
2017-12-20 15:20:23 +00:00
|
|
|
uint32_t per_thread_cache_count;
|
2018-01-14 11:48:01 +00:00
|
|
|
|
|
|
|
TAILQ_HEAD(, spdk_bdev_module_channel) module_channels;
|
2017-05-09 22:07:56 +00:00
|
|
|
};
|
|
|
|
|
2017-06-29 18:23:50 +00:00
|
|
|
struct spdk_bdev_desc {
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
spdk_bdev_remove_cb_t remove_cb;
|
|
|
|
void *remove_ctx;
|
|
|
|
bool write;
|
|
|
|
TAILQ_ENTRY(spdk_bdev_desc) link;
|
|
|
|
};
|
|
|
|
|
2017-09-08 18:44:50 +00:00
|
|
|
#define BDEV_CH_RESET_IN_PROGRESS (1 << 0)
|
2017-12-28 03:11:55 +00:00
|
|
|
#define BDEV_CH_QOS_ENABLED (1 << 1)
|
2017-09-08 18:44:50 +00:00
|
|
|
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel {
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
|
|
|
/* The channel for the underlying device */
|
|
|
|
struct spdk_io_channel *channel;
|
2017-05-09 22:07:56 +00:00
|
|
|
|
|
|
|
/* Channel for the bdev manager */
|
2017-12-28 02:18:12 +00:00
|
|
|
struct spdk_io_channel *mgmt_channel;
|
2017-04-06 21:40:29 +00:00
|
|
|
|
|
|
|
struct spdk_bdev_io_stat stat;
|
2017-05-30 08:45:46 +00:00
|
|
|
|
2018-03-16 12:20:55 +00:00
|
|
|
/*
|
|
|
|
* Count of I/O submitted through this channel and waiting for completion.
|
|
|
|
* Incremented before submit_request() is called on an spdk_bdev_io.
|
|
|
|
*/
|
|
|
|
uint64_t io_outstanding;
|
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
bdev_io_tailq_t queued_resets;
|
|
|
|
|
|
|
|
uint32_t flags;
|
|
|
|
|
2017-12-28 02:18:12 +00:00
|
|
|
/*
|
|
|
|
* Rate limiting on this channel.
|
|
|
|
* Queue of IO awaiting issue because of a QoS rate limiting happened
|
|
|
|
* on this channel.
|
|
|
|
*/
|
|
|
|
bdev_io_tailq_t qos_io;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Rate limiting on this channel.
|
|
|
|
* Maximum allowed IOs to be issued in one timeslice (e.g., 1ms) and
|
|
|
|
* only valid for the master channel which manages the outstanding IOs.
|
|
|
|
*/
|
|
|
|
uint64_t qos_max_ios_per_timeslice;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Rate limiting on this channel.
|
|
|
|
* Submitted IO in one timeslice (e.g., 1ms)
|
|
|
|
*/
|
|
|
|
uint64_t io_submitted_this_timeslice;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Rate limiting on this channel.
|
|
|
|
* Periodic running QoS poller in millisecond.
|
|
|
|
*/
|
|
|
|
struct spdk_poller *qos_poller;
|
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
/* Per-device channel */
|
|
|
|
struct spdk_bdev_module_channel *module_ch;
|
|
|
|
|
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
uint64_t start_tsc;
|
|
|
|
uint64_t interval_tsc;
|
|
|
|
__itt_string_handle *handle;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2018-02-19 22:21:15 +00:00
|
|
|
#define __bdev_to_io_dev(bdev) (((char *)bdev) + 1)
|
|
|
|
#define __bdev_from_io_dev(io_dev) ((struct spdk_bdev *)(((char *)io_dev) - 1))
|
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
/*
|
|
|
|
* Per-module (or per-io_device) channel. Multiple bdevs built on the same io_device
|
|
|
|
* will queue here their IO that awaits retry. It makes it posible to retry sending
|
|
|
|
* IO to one bdev after IO from other bdev completes.
|
|
|
|
*/
|
|
|
|
struct spdk_bdev_module_channel {
|
2017-05-22 17:49:12 +00:00
|
|
|
/*
|
|
|
|
* Count of I/O submitted to bdev module and waiting for completion.
|
|
|
|
* Incremented before submit_request() is called on an spdk_bdev_io.
|
|
|
|
*/
|
|
|
|
uint64_t io_outstanding;
|
|
|
|
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
/*
|
|
|
|
* Queue of IO awaiting retry because of a previous NOMEM status returned
|
|
|
|
* on this channel.
|
|
|
|
*/
|
|
|
|
bdev_io_tailq_t nomem_io;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Threshold which io_outstanding must drop to before retrying nomem_io.
|
|
|
|
*/
|
|
|
|
uint64_t nomem_threshold;
|
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
/* I/O channel allocated by a bdev module */
|
|
|
|
struct spdk_io_channel *module_ch;
|
2017-09-08 18:44:50 +00:00
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
uint32_t ref;
|
2017-05-30 08:45:46 +00:00
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
TAILQ_ENTRY(spdk_bdev_module_channel) link;
|
2017-04-04 21:01:54 +00:00
|
|
|
};
|
|
|
|
|
2017-07-28 22:34:24 +00:00
|
|
|
static void spdk_bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
|
|
|
|
|
2017-05-09 21:01:12 +00:00
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_first(void)
|
2016-08-01 21:31:02 +00:00
|
|
|
{
|
2016-08-02 17:07:34 +00:00
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs);
|
2016-08-02 17:07:34 +00:00
|
|
|
if (bdev) {
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name);
|
2016-08-02 17:07:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
2016-08-01 21:31:02 +00:00
|
|
|
}
|
|
|
|
|
2017-05-09 21:01:12 +00:00
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_next(struct spdk_bdev *prev)
|
2016-08-01 21:31:02 +00:00
|
|
|
{
|
2016-08-02 17:07:34 +00:00
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
|
|
|
bdev = TAILQ_NEXT(prev, link);
|
|
|
|
if (bdev) {
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name);
|
2016-08-02 17:07:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
2016-08-01 21:31:02 +00:00
|
|
|
}
|
|
|
|
|
2017-06-29 20:16:26 +00:00
|
|
|
static struct spdk_bdev *
|
|
|
|
_bdev_next_leaf(struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
while (bdev != NULL) {
|
2018-03-06 17:42:54 +00:00
|
|
|
if (bdev->claim_module == NULL) {
|
2017-06-29 20:16:26 +00:00
|
|
|
return bdev;
|
|
|
|
} else {
|
|
|
|
bdev = TAILQ_NEXT(bdev, link);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_first_leaf(void)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
|
|
|
bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs));
|
|
|
|
|
|
|
|
if (bdev) {
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name);
|
2017-06-29 20:16:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_next_leaf(struct spdk_bdev *prev)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
|
|
|
bdev = _bdev_next_leaf(TAILQ_NEXT(prev, link));
|
|
|
|
|
|
|
|
if (bdev) {
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name);
|
2017-06-29 20:16:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
|
|
|
}
|
|
|
|
|
2017-05-09 21:01:12 +00:00
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_get_by_name(const char *bdev_name)
|
2016-08-01 21:31:02 +00:00
|
|
|
{
|
2017-11-29 15:13:17 +00:00
|
|
|
struct spdk_bdev_alias *tmp;
|
2016-08-01 21:31:02 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_first();
|
|
|
|
|
|
|
|
while (bdev != NULL) {
|
2017-06-02 17:25:43 +00:00
|
|
|
if (strcmp(bdev_name, bdev->name) == 0) {
|
2016-08-02 17:07:34 +00:00
|
|
|
return bdev;
|
2016-08-01 21:31:02 +00:00
|
|
|
}
|
2017-11-29 15:13:17 +00:00
|
|
|
|
|
|
|
TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
|
|
|
|
if (strcmp(bdev_name, tmp->alias) == 0) {
|
|
|
|
return bdev;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-01 21:31:02 +00:00
|
|
|
bdev = spdk_bdev_next(bdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
static void
|
2017-05-05 20:15:51 +00:00
|
|
|
spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-05-05 20:15:51 +00:00
|
|
|
assert(bdev_io->get_buf_cb != NULL);
|
2016-08-18 16:45:50 +00:00
|
|
|
assert(buf != NULL);
|
2017-09-20 13:10:17 +00:00
|
|
|
assert(bdev_io->u.bdev.iovs != NULL);
|
2016-10-04 14:39:27 +00:00
|
|
|
|
2017-05-09 20:32:20 +00:00
|
|
|
bdev_io->buf = buf;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL);
|
2017-09-22 20:59:55 +00:00
|
|
|
bdev_io->u.bdev.iovs[0].iov_len = bdev_io->buf_len;
|
2017-05-05 20:15:51 +00:00
|
|
|
bdev_io->get_buf_cb(bdev_io->ch->channel, bdev_io);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-05-05 20:15:51 +00:00
|
|
|
spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-04-11 04:36:05 +00:00
|
|
|
struct spdk_mempool *pool;
|
2016-09-19 06:06:40 +00:00
|
|
|
struct spdk_bdev_io *tmp;
|
2016-07-20 18:16:23 +00:00
|
|
|
void *buf;
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t *stailq;
|
2017-05-10 21:42:45 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch;
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-09-20 13:10:17 +00:00
|
|
|
assert(bdev_io->u.bdev.iovcnt == 1);
|
2016-10-04 14:39:27 +00:00
|
|
|
|
2017-05-09 20:32:20 +00:00
|
|
|
buf = bdev_io->buf;
|
2018-01-11 15:07:27 +00:00
|
|
|
ch = bdev_io->mgmt_ch;
|
2017-05-10 21:42:45 +00:00
|
|
|
|
2017-09-22 20:59:55 +00:00
|
|
|
if (bdev_io->buf_len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) {
|
2017-05-09 21:09:28 +00:00
|
|
|
pool = g_bdev_mgr.buf_small_pool;
|
2018-01-05 21:55:38 +00:00
|
|
|
stailq = &ch->need_buf_small;
|
2016-07-20 18:16:23 +00:00
|
|
|
} else {
|
2017-05-09 21:09:28 +00:00
|
|
|
pool = g_bdev_mgr.buf_large_pool;
|
2018-01-05 21:55:38 +00:00
|
|
|
stailq = &ch->need_buf_large;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
if (STAILQ_EMPTY(stailq)) {
|
2017-04-11 04:36:05 +00:00
|
|
|
spdk_mempool_put(pool, buf);
|
2016-07-20 18:16:23 +00:00
|
|
|
} else {
|
2018-01-05 21:55:38 +00:00
|
|
|
tmp = STAILQ_FIRST(stailq);
|
|
|
|
STAILQ_REMOVE_HEAD(stailq, buf_link);
|
2017-05-05 20:15:51 +00:00
|
|
|
spdk_bdev_io_set_buf(tmp, buf);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-09 21:32:49 +00:00
|
|
|
void
|
2017-09-22 20:59:55 +00:00
|
|
|
spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len)
|
2017-05-09 21:32:49 +00:00
|
|
|
{
|
|
|
|
struct spdk_mempool *pool;
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t *stailq;
|
2017-05-09 21:32:49 +00:00
|
|
|
void *buf = NULL;
|
2017-05-10 21:42:45 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch;
|
2017-05-09 21:32:49 +00:00
|
|
|
|
|
|
|
assert(cb != NULL);
|
2017-09-20 13:10:17 +00:00
|
|
|
assert(bdev_io->u.bdev.iovs != NULL);
|
2017-05-09 21:32:49 +00:00
|
|
|
|
2017-09-20 13:10:17 +00:00
|
|
|
if (spdk_unlikely(bdev_io->u.bdev.iovs[0].iov_base != NULL)) {
|
2017-05-09 21:32:49 +00:00
|
|
|
/* Buffer already present */
|
|
|
|
cb(bdev_io->ch->channel, bdev_io);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-09-27 18:59:50 +00:00
|
|
|
assert(len <= SPDK_BDEV_LARGE_BUF_MAX_SIZE);
|
2017-05-10 21:42:45 +00:00
|
|
|
ch = spdk_io_channel_get_ctx(bdev_io->ch->mgmt_channel);
|
|
|
|
|
2017-09-22 20:59:55 +00:00
|
|
|
bdev_io->buf_len = len;
|
2017-05-09 21:32:49 +00:00
|
|
|
bdev_io->get_buf_cb = cb;
|
|
|
|
if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) {
|
|
|
|
pool = g_bdev_mgr.buf_small_pool;
|
2018-01-05 21:55:38 +00:00
|
|
|
stailq = &ch->need_buf_small;
|
2017-05-09 21:32:49 +00:00
|
|
|
} else {
|
|
|
|
pool = g_bdev_mgr.buf_large_pool;
|
2018-01-05 21:55:38 +00:00
|
|
|
stailq = &ch->need_buf_large;
|
2017-05-09 21:32:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
buf = spdk_mempool_get(pool);
|
|
|
|
|
|
|
|
if (!buf) {
|
2018-01-05 21:55:38 +00:00
|
|
|
STAILQ_INSERT_TAIL(stailq, bdev_io, buf_link);
|
2017-05-09 21:32:49 +00:00
|
|
|
} else {
|
|
|
|
spdk_bdev_io_set_buf(bdev_io, buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
static int
|
|
|
|
spdk_bdev_module_get_max_ctx_size(void)
|
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *bdev_module;
|
2016-07-20 18:16:23 +00:00
|
|
|
int max_bdev_module_size = 0;
|
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) {
|
2016-07-20 18:16:23 +00:00
|
|
|
if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) {
|
|
|
|
max_bdev_module_size = bdev_module->get_ctx_size();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return max_bdev_module_size;
|
|
|
|
}
|
|
|
|
|
2017-06-06 20:26:04 +00:00
|
|
|
void
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_config_text(FILE *fp)
|
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *bdev_module;
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) {
|
2016-07-20 18:16:23 +00:00
|
|
|
if (bdev_module->config_text) {
|
|
|
|
bdev_module->config_text(fp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-23 13:55:07 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w)
|
2018-02-22 12:48:13 +00:00
|
|
|
{
|
|
|
|
struct spdk_bdev_module *bdev_module;
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
2018-03-23 13:55:07 +00:00
|
|
|
assert(w != NULL);
|
2018-02-22 12:48:13 +00:00
|
|
|
|
|
|
|
spdk_json_write_array_begin(w);
|
|
|
|
|
|
|
|
TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) {
|
|
|
|
if (bdev_module->config_json) {
|
|
|
|
bdev_module->config_json(w);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TAILQ_FOREACH(bdev, &g_bdev_mgr.bdevs, link) {
|
2018-03-23 13:55:07 +00:00
|
|
|
spdk_bdev_config_json(bdev, w);
|
2018-02-22 12:48:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
spdk_json_write_array_end(w);
|
|
|
|
}
|
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
static int
|
|
|
|
spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf)
|
|
|
|
{
|
2017-05-10 21:42:45 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch = ctx_buf;
|
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
STAILQ_INIT(&ch->need_buf_small);
|
|
|
|
STAILQ_INIT(&ch->need_buf_large);
|
2017-05-10 21:42:45 +00:00
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
STAILQ_INIT(&ch->per_thread_cache);
|
2017-12-20 15:20:23 +00:00
|
|
|
ch->per_thread_cache_count = 0;
|
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
TAILQ_INIT(&ch->module_channels);
|
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-12-20 16:04:05 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_mgmt_channel_free_resources(struct spdk_bdev_mgmt_channel *ch)
|
|
|
|
{
|
2017-12-20 15:20:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
if (!STAILQ_EMPTY(&ch->need_buf_small) || !STAILQ_EMPTY(&ch->need_buf_large)) {
|
2017-12-20 16:04:05 +00:00
|
|
|
SPDK_ERRLOG("Pending I/O list wasn't empty on channel free\n");
|
|
|
|
}
|
2017-12-20 15:20:23 +00:00
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
while (!STAILQ_EMPTY(&ch->per_thread_cache)) {
|
|
|
|
bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
|
|
|
|
STAILQ_REMOVE_HEAD(&ch->per_thread_cache, buf_link);
|
2017-12-20 15:20:23 +00:00
|
|
|
ch->per_thread_cache_count--;
|
|
|
|
spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(ch->per_thread_cache_count == 0);
|
2017-12-20 16:04:05 +00:00
|
|
|
}
|
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf)
|
|
|
|
{
|
2017-05-10 21:42:45 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch = ctx_buf;
|
|
|
|
|
2017-12-20 16:04:05 +00:00
|
|
|
spdk_bdev_mgmt_channel_free_resources(ch);
|
2017-05-09 22:07:56 +00:00
|
|
|
}
|
|
|
|
|
2017-06-14 23:37:15 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_init_complete(int rc)
|
|
|
|
{
|
2017-10-25 13:58:02 +00:00
|
|
|
spdk_bdev_init_cb cb_fn = g_init_cb_fn;
|
|
|
|
void *cb_arg = g_init_cb_arg;
|
2017-06-14 23:37:15 +00:00
|
|
|
|
2017-07-10 23:36:35 +00:00
|
|
|
g_bdev_mgr.init_complete = true;
|
2017-10-25 13:58:02 +00:00
|
|
|
g_init_cb_fn = NULL;
|
|
|
|
g_init_cb_arg = NULL;
|
2017-06-14 23:37:15 +00:00
|
|
|
|
|
|
|
cb_fn(cb_arg, rc);
|
|
|
|
}
|
|
|
|
|
2017-07-10 23:36:35 +00:00
|
|
|
static void
|
2017-08-25 07:30:10 +00:00
|
|
|
spdk_bdev_module_action_complete(void)
|
2017-07-10 23:36:35 +00:00
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *m;
|
2017-07-10 23:36:35 +00:00
|
|
|
|
|
|
|
/*
|
2017-08-24 15:36:25 +00:00
|
|
|
* Don't finish bdev subsystem initialization if
|
|
|
|
* module pre-initialization is still in progress, or
|
|
|
|
* the subsystem been already initialized.
|
|
|
|
*/
|
|
|
|
if (!g_bdev_mgr.module_init_complete || g_bdev_mgr.init_complete) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check all bdev modules for inits/examinations in progress. If any
|
2017-07-10 23:36:35 +00:00
|
|
|
* exist, return immediately since we cannot finish bdev subsystem
|
|
|
|
* initialization until all are completed.
|
|
|
|
*/
|
2017-07-13 04:06:22 +00:00
|
|
|
TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, tailq) {
|
2017-08-24 15:36:25 +00:00
|
|
|
if (m->action_in_progress > 0) {
|
2017-07-10 23:36:35 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-25 07:30:10 +00:00
|
|
|
/*
|
|
|
|
* Modules already finished initialization - now that all
|
2017-08-24 15:36:25 +00:00
|
|
|
* the bdev modules have finished their asynchronous I/O
|
2017-08-25 07:30:10 +00:00
|
|
|
* processing, the entire bdev layer can be marked as complete.
|
|
|
|
*/
|
2017-07-13 17:36:19 +00:00
|
|
|
spdk_bdev_init_complete(0);
|
2017-07-10 23:36:35 +00:00
|
|
|
}
|
|
|
|
|
2017-08-24 15:36:25 +00:00
|
|
|
static void
|
2018-03-09 22:20:21 +00:00
|
|
|
spdk_bdev_module_action_done(struct spdk_bdev_module *module)
|
2017-08-24 15:36:25 +00:00
|
|
|
{
|
|
|
|
assert(module->action_in_progress > 0);
|
|
|
|
module->action_in_progress--;
|
|
|
|
spdk_bdev_module_action_complete();
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2018-03-09 22:20:21 +00:00
|
|
|
spdk_bdev_module_init_done(struct spdk_bdev_module *module)
|
2017-08-24 15:36:25 +00:00
|
|
|
{
|
|
|
|
spdk_bdev_module_action_done(module);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2018-03-09 22:20:21 +00:00
|
|
|
spdk_bdev_module_examine_done(struct spdk_bdev_module *module)
|
2017-08-24 15:36:25 +00:00
|
|
|
{
|
|
|
|
spdk_bdev_module_action_done(module);
|
|
|
|
}
|
|
|
|
|
2017-07-13 17:36:19 +00:00
|
|
|
static int
|
|
|
|
spdk_bdev_modules_init(void)
|
2017-05-26 04:58:04 +00:00
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *module;
|
2017-08-25 07:30:10 +00:00
|
|
|
int rc = 0;
|
2017-05-26 04:58:04 +00:00
|
|
|
|
2017-07-13 17:36:19 +00:00
|
|
|
TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) {
|
|
|
|
rc = module->module_init();
|
|
|
|
if (rc != 0) {
|
2017-08-25 07:30:10 +00:00
|
|
|
break;
|
2017-07-13 17:36:19 +00:00
|
|
|
}
|
2017-05-26 04:58:04 +00:00
|
|
|
}
|
|
|
|
|
2017-08-25 07:30:10 +00:00
|
|
|
g_bdev_mgr.module_init_complete = true;
|
|
|
|
return rc;
|
2017-05-26 04:58:04 +00:00
|
|
|
}
|
2017-06-15 19:01:53 +00:00
|
|
|
void
|
2017-11-17 21:49:36 +00:00
|
|
|
spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-05-10 21:42:45 +00:00
|
|
|
int cache_size;
|
2017-05-09 21:27:36 +00:00
|
|
|
int rc = 0;
|
2017-08-17 02:01:54 +00:00
|
|
|
char mempool_name[32];
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-06-14 23:37:15 +00:00
|
|
|
assert(cb_fn != NULL);
|
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
g_init_cb_fn = cb_fn;
|
|
|
|
g_init_cb_arg = cb_arg;
|
2017-06-14 23:37:15 +00:00
|
|
|
|
2017-08-17 02:01:54 +00:00
|
|
|
snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid());
|
|
|
|
|
|
|
|
g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name,
|
2017-05-09 21:09:28 +00:00
|
|
|
SPDK_BDEV_IO_POOL_SIZE,
|
|
|
|
sizeof(struct spdk_bdev_io) +
|
|
|
|
spdk_bdev_module_get_max_ctx_size(),
|
2017-12-20 15:20:23 +00:00
|
|
|
0,
|
2017-05-09 21:09:28 +00:00
|
|
|
SPDK_ENV_SOCKET_ID_ANY);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
if (g_bdev_mgr.bdev_io_pool == NULL) {
|
2017-08-18 13:22:25 +00:00
|
|
|
SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n");
|
2017-08-24 11:42:06 +00:00
|
|
|
spdk_bdev_init_complete(-1);
|
2017-07-13 17:36:19 +00:00
|
|
|
return;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-09 21:27:36 +00:00
|
|
|
/**
|
|
|
|
* Ensure no more than half of the total buffers end up local caches, by
|
|
|
|
* using spdk_env_get_core_count() to determine how many local caches we need
|
|
|
|
* to account for.
|
|
|
|
*/
|
|
|
|
cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count());
|
2017-08-17 02:01:54 +00:00
|
|
|
snprintf(mempool_name, sizeof(mempool_name), "buf_small_pool_%d", getpid());
|
|
|
|
|
|
|
|
g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name,
|
2017-05-09 21:27:36 +00:00
|
|
|
BUF_SMALL_POOL_SIZE,
|
|
|
|
SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512,
|
|
|
|
cache_size,
|
|
|
|
SPDK_ENV_SOCKET_ID_ANY);
|
|
|
|
if (!g_bdev_mgr.buf_small_pool) {
|
|
|
|
SPDK_ERRLOG("create rbuf small pool failed\n");
|
2017-08-24 11:42:06 +00:00
|
|
|
spdk_bdev_init_complete(-1);
|
2017-07-13 17:36:19 +00:00
|
|
|
return;
|
2017-05-09 21:27:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count());
|
2017-08-17 02:01:54 +00:00
|
|
|
snprintf(mempool_name, sizeof(mempool_name), "buf_large_pool_%d", getpid());
|
|
|
|
|
|
|
|
g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name,
|
2017-05-09 21:27:36 +00:00
|
|
|
BUF_LARGE_POOL_SIZE,
|
|
|
|
SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512,
|
|
|
|
cache_size,
|
|
|
|
SPDK_ENV_SOCKET_ID_ANY);
|
|
|
|
if (!g_bdev_mgr.buf_large_pool) {
|
|
|
|
SPDK_ERRLOG("create rbuf large pool failed\n");
|
2017-08-24 11:42:06 +00:00
|
|
|
spdk_bdev_init_complete(-1);
|
2017-07-13 17:36:19 +00:00
|
|
|
return;
|
2017-05-09 21:27:36 +00:00
|
|
|
}
|
|
|
|
|
2017-07-28 22:34:24 +00:00
|
|
|
g_bdev_mgr.zero_buffer = spdk_dma_zmalloc(ZERO_BUFFER_SIZE, ZERO_BUFFER_SIZE,
|
|
|
|
NULL);
|
|
|
|
if (!g_bdev_mgr.zero_buffer) {
|
|
|
|
SPDK_ERRLOG("create bdev zero buffer failed\n");
|
|
|
|
spdk_bdev_init_complete(-1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-05-30 08:45:46 +00:00
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
g_bdev_mgr.domain = __itt_domain_create("spdk_bdev");
|
|
|
|
#endif
|
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
spdk_io_device_register(&g_bdev_mgr, spdk_bdev_mgmt_channel_create,
|
|
|
|
spdk_bdev_mgmt_channel_destroy,
|
|
|
|
sizeof(struct spdk_bdev_mgmt_channel));
|
|
|
|
|
2017-07-13 17:36:19 +00:00
|
|
|
rc = spdk_bdev_modules_init();
|
2017-08-25 07:30:10 +00:00
|
|
|
if (rc != 0) {
|
|
|
|
SPDK_ERRLOG("bdev modules init failed\n");
|
|
|
|
spdk_bdev_init_complete(-1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
spdk_bdev_module_action_complete();
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_module_finish_cb(void *io_device)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-10-25 13:58:02 +00:00
|
|
|
spdk_bdev_fini_cb cb_fn = g_fini_cb_fn;
|
2017-05-09 21:50:43 +00:00
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
cb_fn(g_fini_cb_arg);
|
|
|
|
g_fini_cb_fn = NULL;
|
|
|
|
g_fini_cb_arg = NULL;
|
|
|
|
}
|
2017-05-09 21:50:43 +00:00
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
static void
|
2017-12-20 16:04:05 +00:00
|
|
|
spdk_bdev_module_finish_complete(struct spdk_io_channel_iter *i, int status)
|
2017-10-25 13:58:02 +00:00
|
|
|
{
|
2017-05-09 21:50:43 +00:00
|
|
|
if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != SPDK_BDEV_IO_POOL_SIZE) {
|
|
|
|
SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n",
|
|
|
|
spdk_mempool_count(g_bdev_mgr.bdev_io_pool),
|
|
|
|
SPDK_BDEV_IO_POOL_SIZE);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-09 21:50:43 +00:00
|
|
|
if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) {
|
|
|
|
SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n",
|
|
|
|
spdk_mempool_count(g_bdev_mgr.buf_small_pool),
|
|
|
|
BUF_SMALL_POOL_SIZE);
|
|
|
|
assert(false);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-09 21:50:43 +00:00
|
|
|
if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) {
|
|
|
|
SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n",
|
|
|
|
spdk_mempool_count(g_bdev_mgr.buf_large_pool),
|
|
|
|
BUF_LARGE_POOL_SIZE);
|
|
|
|
assert(false);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-09 21:50:43 +00:00
|
|
|
spdk_mempool_free(g_bdev_mgr.bdev_io_pool);
|
|
|
|
spdk_mempool_free(g_bdev_mgr.buf_small_pool);
|
|
|
|
spdk_mempool_free(g_bdev_mgr.buf_large_pool);
|
2017-07-28 22:34:24 +00:00
|
|
|
spdk_dma_free(g_bdev_mgr.zero_buffer);
|
2017-05-09 21:50:43 +00:00
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
spdk_io_device_unregister(&g_bdev_mgr, spdk_bdev_module_finish_cb);
|
|
|
|
}
|
|
|
|
|
2017-12-20 16:04:05 +00:00
|
|
|
static void
|
|
|
|
mgmt_channel_free_resources(struct spdk_io_channel_iter *i)
|
|
|
|
{
|
|
|
|
struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
|
|
|
|
struct spdk_bdev_mgmt_channel *ch = spdk_io_channel_get_ctx(_ch);
|
|
|
|
|
|
|
|
spdk_bdev_mgmt_channel_free_resources(ch);
|
|
|
|
spdk_for_each_channel_continue(i, 0);
|
|
|
|
}
|
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
static void
|
2017-11-10 18:16:51 +00:00
|
|
|
spdk_bdev_module_finish_iter(void *arg)
|
2017-10-25 13:58:02 +00:00
|
|
|
{
|
2017-11-10 18:16:51 +00:00
|
|
|
/* Notice that this variable is static. It is saved between calls to
|
|
|
|
* this function. */
|
2018-03-09 22:20:21 +00:00
|
|
|
static struct spdk_bdev_module *resume_bdev_module = NULL;
|
|
|
|
struct spdk_bdev_module *bdev_module;
|
2017-11-10 18:16:51 +00:00
|
|
|
|
|
|
|
/* Start iterating from the last touched module */
|
|
|
|
if (!resume_bdev_module) {
|
|
|
|
bdev_module = TAILQ_FIRST(&g_bdev_mgr.bdev_modules);
|
|
|
|
} else {
|
|
|
|
bdev_module = TAILQ_NEXT(resume_bdev_module, tailq);
|
|
|
|
}
|
|
|
|
|
|
|
|
while (bdev_module) {
|
|
|
|
if (bdev_module->async_fini) {
|
|
|
|
/* Save our place so we can resume later. We must
|
|
|
|
* save the variable here, before calling module_fini()
|
|
|
|
* below, because in some cases the module may immediately
|
|
|
|
* call spdk_bdev_module_finish_done() and re-enter
|
|
|
|
* this function to continue iterating. */
|
|
|
|
resume_bdev_module = bdev_module;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdev_module->module_fini) {
|
|
|
|
bdev_module->module_fini();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdev_module->async_fini) {
|
|
|
|
return;
|
|
|
|
}
|
2017-10-25 13:58:02 +00:00
|
|
|
|
2017-11-10 18:16:51 +00:00
|
|
|
bdev_module = TAILQ_NEXT(bdev_module, tailq);
|
|
|
|
}
|
|
|
|
|
|
|
|
resume_bdev_module = NULL;
|
2017-12-20 16:04:05 +00:00
|
|
|
spdk_for_each_channel(&g_bdev_mgr, mgmt_channel_free_resources, NULL,
|
|
|
|
spdk_bdev_module_finish_complete);
|
2017-10-25 13:58:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spdk_bdev_module_finish_done(void)
|
|
|
|
{
|
|
|
|
if (spdk_get_thread() != g_fini_thread) {
|
2017-11-10 18:16:51 +00:00
|
|
|
spdk_thread_send_msg(g_fini_thread, spdk_bdev_module_finish_iter, NULL);
|
2017-10-25 13:58:02 +00:00
|
|
|
} else {
|
2017-11-10 18:16:51 +00:00
|
|
|
spdk_bdev_module_finish_iter(NULL);
|
2017-10-25 13:58:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-21 17:45:05 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = cb_arg;
|
|
|
|
|
|
|
|
if (bdeverrno && bdev) {
|
|
|
|
SPDK_WARNLOG("Unable to unregister bdev '%s' during spdk_bdev_finish()\n",
|
|
|
|
bdev->name);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since the call to spdk_bdev_unregister() failed, we have no way to free this
|
|
|
|
* bdev; try to continue by manually removing this bdev from the list and continue
|
|
|
|
* with the next bdev in the list.
|
|
|
|
*/
|
|
|
|
TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (TAILQ_EMPTY(&g_bdev_mgr.bdevs)) {
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Done unregistering bdevs\n");
|
2018-03-26 19:19:04 +00:00
|
|
|
/*
|
|
|
|
* Bdev module finish need to be deffered as we might be in the middle of some context
|
|
|
|
* (like bdev part free) that will use this bdev (or private bdev driver ctx data)
|
|
|
|
* after returning.
|
|
|
|
*/
|
|
|
|
spdk_thread_send_msg(spdk_get_thread(), spdk_bdev_module_finish_iter, NULL);
|
2017-11-21 17:45:05 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Unregister the first bdev in the list.
|
|
|
|
*
|
|
|
|
* spdk_bdev_unregister() will handle the case where the bdev has open descriptors by
|
|
|
|
* calling the remove_cb of the descriptors first.
|
|
|
|
*
|
|
|
|
* Once this bdev and all of its open descriptors have been cleaned up, this function
|
|
|
|
* will be called again via the unregister completion callback to continue the cleanup
|
|
|
|
* process with the next bdev.
|
|
|
|
*/
|
|
|
|
bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs);
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Unregistering bdev '%s'\n", bdev->name);
|
|
|
|
spdk_bdev_unregister(bdev, _spdk_bdev_finish_unregister_bdevs_iter, bdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_finish_unregister_bdevs(void)
|
|
|
|
{
|
|
|
|
_spdk_bdev_finish_unregister_bdevs_iter(NULL, 0);
|
|
|
|
}
|
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg)
|
|
|
|
{
|
|
|
|
assert(cb_fn != NULL);
|
|
|
|
|
|
|
|
g_fini_thread = spdk_get_thread();
|
|
|
|
|
|
|
|
g_fini_cb_fn = cb_fn;
|
|
|
|
g_fini_cb_arg = cb_arg;
|
|
|
|
|
2017-11-21 17:45:05 +00:00
|
|
|
_spdk_bdev_finish_unregister_bdevs();
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
static struct spdk_bdev_io *
|
|
|
|
spdk_bdev_get_io(struct spdk_io_channel *_ch)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-12-20 15:20:23 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch = spdk_io_channel_get_ctx(_ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
|
2017-12-20 15:20:23 +00:00
|
|
|
if (ch->per_thread_cache_count > 0) {
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
|
|
|
|
STAILQ_REMOVE_HEAD(&ch->per_thread_cache, buf_link);
|
2017-12-20 15:20:23 +00:00
|
|
|
ch->per_thread_cache_count--;
|
|
|
|
} else {
|
|
|
|
bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
|
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("Unable to get spdk_bdev_io\n");
|
2018-03-12 20:26:35 +00:00
|
|
|
return NULL;
|
2017-12-20 15:20:23 +00:00
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-01-11 15:07:27 +00:00
|
|
|
bdev_io->mgmt_ch = ch;
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
return bdev_io;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spdk_bdev_put_io(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
2018-01-11 15:07:27 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch = bdev_io->mgmt_ch;
|
2017-12-20 15:20:23 +00:00
|
|
|
|
2017-05-09 20:32:20 +00:00
|
|
|
if (bdev_io->buf != NULL) {
|
2017-05-05 20:15:51 +00:00
|
|
|
spdk_bdev_io_put_buf(bdev_io);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-12-20 15:20:23 +00:00
|
|
|
if (ch->per_thread_cache_count < SPDK_BDEV_IO_CACHE_SIZE) {
|
|
|
|
ch->per_thread_cache_count++;
|
2018-01-05 21:55:38 +00:00
|
|
|
STAILQ_INSERT_TAIL(&ch->per_thread_cache, bdev_io, buf_link);
|
2017-12-20 15:20:23 +00:00
|
|
|
} else {
|
|
|
|
spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
static void
|
2017-12-28 03:11:55 +00:00
|
|
|
_spdk_bdev_qos_io_submit(void *ctx)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_channel *ch = ctx;
|
|
|
|
struct spdk_bdev_io *bdev_io = NULL;
|
|
|
|
struct spdk_bdev *bdev = ch->bdev;
|
2018-04-05 22:33:48 +00:00
|
|
|
struct spdk_bdev_module_channel *module_ch = ch->module_ch;
|
2017-12-28 03:11:55 +00:00
|
|
|
|
|
|
|
while (!TAILQ_EMPTY(&ch->qos_io)) {
|
|
|
|
if (ch->io_submitted_this_timeslice < ch->qos_max_ios_per_timeslice) {
|
|
|
|
bdev_io = TAILQ_FIRST(&ch->qos_io);
|
|
|
|
TAILQ_REMOVE(&ch->qos_io, bdev_io, link);
|
|
|
|
ch->io_submitted_this_timeslice++;
|
2018-03-30 02:55:05 +00:00
|
|
|
ch->io_outstanding++;
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch->io_outstanding++;
|
2017-12-28 03:11:55 +00:00
|
|
|
bdev->fn_table->submit_request(ch->channel, bdev_io);
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_io_submit(void *ctx)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-12-28 03:11:55 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = ctx;
|
2017-09-07 21:42:50 +00:00
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
2017-09-14 21:02:09 +00:00
|
|
|
struct spdk_bdev_channel *bdev_ch = bdev_io->ch;
|
|
|
|
struct spdk_io_channel *ch = bdev_ch->channel;
|
2018-04-05 22:33:48 +00:00
|
|
|
struct spdk_bdev_module_channel *module_ch = bdev_ch->module_ch;
|
2017-09-14 21:02:09 +00:00
|
|
|
|
2017-12-06 19:02:51 +00:00
|
|
|
bdev_io->submit_tsc = spdk_get_ticks();
|
2018-03-16 12:20:55 +00:00
|
|
|
bdev_ch->io_outstanding++;
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch->io_outstanding++;
|
2017-09-14 21:02:09 +00:00
|
|
|
bdev_io->in_submit_request = true;
|
2017-09-08 18:44:50 +00:00
|
|
|
if (spdk_likely(bdev_ch->flags == 0)) {
|
2018-04-05 22:33:48 +00:00
|
|
|
if (spdk_likely(TAILQ_EMPTY(&module_ch->nomem_io))) {
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
bdev->fn_table->submit_request(ch, bdev_io);
|
|
|
|
} else {
|
2018-03-16 12:20:55 +00:00
|
|
|
bdev_ch->io_outstanding--;
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch->io_outstanding--;
|
|
|
|
TAILQ_INSERT_TAIL(&module_ch->nomem_io, bdev_io, link);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
}
|
2017-09-08 18:44:50 +00:00
|
|
|
} else if (bdev_ch->flags & BDEV_CH_RESET_IN_PROGRESS) {
|
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
2017-12-28 03:11:55 +00:00
|
|
|
} else if (bdev_ch->flags & BDEV_CH_QOS_ENABLED) {
|
2018-03-30 02:55:05 +00:00
|
|
|
bdev_ch->io_outstanding--;
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch->io_outstanding--;
|
2017-12-28 03:11:55 +00:00
|
|
|
TAILQ_INSERT_TAIL(&bdev_ch->qos_io, bdev_io, link);
|
|
|
|
_spdk_bdev_qos_io_submit(bdev_ch);
|
2017-09-08 18:44:50 +00:00
|
|
|
} else {
|
|
|
|
SPDK_ERRLOG("unknown bdev_ch flag %x found\n", bdev_ch->flags);
|
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
}
|
2017-09-14 21:02:09 +00:00
|
|
|
bdev_io->in_submit_request = false;
|
|
|
|
}
|
|
|
|
|
2017-12-28 03:11:55 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
|
|
|
|
|
|
|
assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING);
|
|
|
|
|
|
|
|
/* QoS channel and thread have been properly configured */
|
|
|
|
if (bdev->ios_per_sec > 0 && bdev->qos_channel && bdev->qos_thread) {
|
|
|
|
bdev_io->io_submit_ch = bdev_io->ch;
|
|
|
|
bdev_io->ch = bdev->qos_channel;
|
|
|
|
spdk_thread_send_msg(bdev->qos_thread, _spdk_bdev_io_submit, bdev_io);
|
|
|
|
} else {
|
|
|
|
_spdk_bdev_io_submit(bdev_io);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-14 21:02:09 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_io_submit_reset(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
|
|
|
struct spdk_bdev_channel *bdev_ch = bdev_io->ch;
|
|
|
|
struct spdk_io_channel *ch = bdev_ch->channel;
|
2017-05-04 20:18:03 +00:00
|
|
|
|
2017-01-11 23:38:11 +00:00
|
|
|
assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING);
|
2017-05-04 20:18:03 +00:00
|
|
|
|
2017-01-12 17:58:20 +00:00
|
|
|
bdev_io->in_submit_request = true;
|
2017-05-04 20:18:03 +00:00
|
|
|
bdev->fn_table->submit_request(ch, bdev_io);
|
2017-01-12 17:58:20 +00:00
|
|
|
bdev_io->in_submit_request = false;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spdk_bdev_io_init(struct spdk_bdev_io *bdev_io,
|
|
|
|
struct spdk_bdev *bdev, void *cb_arg,
|
|
|
|
spdk_bdev_io_completion_cb cb)
|
|
|
|
{
|
|
|
|
bdev_io->bdev = bdev;
|
|
|
|
bdev_io->caller_ctx = cb_arg;
|
|
|
|
bdev_io->cb = cb;
|
|
|
|
bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING;
|
2017-01-12 17:58:20 +00:00
|
|
|
bdev_io->in_submit_request = false;
|
2018-01-05 00:44:02 +00:00
|
|
|
bdev_io->buf = NULL;
|
2017-12-28 03:11:55 +00:00
|
|
|
bdev_io->io_submit_ch = NULL;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2016-08-24 17:25:49 +00:00
|
|
|
bool
|
|
|
|
spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
|
|
|
|
{
|
2017-04-04 21:10:00 +00:00
|
|
|
return bdev->fn_table->io_type_supported(bdev->ctxt, io_type);
|
2016-08-24 17:25:49 +00:00
|
|
|
}
|
|
|
|
|
2016-11-18 17:22:58 +00:00
|
|
|
int
|
2018-02-22 12:48:13 +00:00
|
|
|
spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
|
2016-11-18 17:22:58 +00:00
|
|
|
{
|
2018-02-22 12:48:13 +00:00
|
|
|
if (bdev->fn_table->dump_info_json) {
|
|
|
|
return bdev->fn_table->dump_info_json(bdev->ctxt, w);
|
2016-11-18 17:22:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-23 13:55:07 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
|
2018-02-22 12:48:13 +00:00
|
|
|
{
|
2018-03-29 17:38:32 +00:00
|
|
|
assert(bdev != NULL);
|
|
|
|
assert(w != NULL);
|
2018-02-22 12:48:13 +00:00
|
|
|
|
|
|
|
if (bdev->fn_table->write_config_json) {
|
|
|
|
bdev->fn_table->write_config_json(bdev, w);
|
|
|
|
} else {
|
|
|
|
spdk_json_write_object_begin(w);
|
|
|
|
spdk_json_write_named_string(w, "name", bdev->name);
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-28 03:11:55 +00:00
|
|
|
static void
|
2018-03-27 05:50:44 +00:00
|
|
|
spdk_bdev_qos_get_max_ios_per_timeslice(struct spdk_bdev_channel *qos_ch)
|
2017-12-28 03:11:55 +00:00
|
|
|
{
|
2018-03-27 05:50:44 +00:00
|
|
|
uint64_t qos_max_ios_per_timeslice = 0;
|
|
|
|
struct spdk_bdev *bdev = qos_ch->bdev;
|
2017-12-28 03:11:55 +00:00
|
|
|
|
|
|
|
qos_max_ios_per_timeslice = bdev->ios_per_sec * SPDK_BDEV_QOS_TIMESLICE_IN_USEC /
|
|
|
|
SPDK_BDEV_SEC_TO_USEC;
|
2018-03-27 05:50:44 +00:00
|
|
|
qos_ch->qos_max_ios_per_timeslice = spdk_max(qos_max_ios_per_timeslice,
|
|
|
|
SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE);
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
|
|
|
|
2018-03-13 00:16:47 +00:00
|
|
|
static int
|
2017-12-28 03:11:55 +00:00
|
|
|
spdk_bdev_channel_poll_qos(void *arg)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_channel *ch = arg;
|
|
|
|
|
|
|
|
/* Reset for next round of rate limiting */
|
|
|
|
ch->io_submitted_this_timeslice = 0;
|
2018-03-27 05:50:44 +00:00
|
|
|
spdk_bdev_qos_get_max_ios_per_timeslice(ch);
|
2017-12-28 03:11:55 +00:00
|
|
|
|
|
|
|
_spdk_bdev_qos_io_submit(ch);
|
2018-03-13 00:16:47 +00:00
|
|
|
|
|
|
|
return -1;
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
|
|
|
|
2017-04-04 21:01:54 +00:00
|
|
|
static int
|
2018-01-21 23:03:27 +00:00
|
|
|
_spdk_bdev_channel_create(struct spdk_bdev_channel *ch, void *io_device)
|
2017-04-04 21:01:54 +00:00
|
|
|
{
|
2018-02-19 22:21:15 +00:00
|
|
|
struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
|
2018-01-14 11:48:01 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_ch;
|
2018-04-05 22:33:48 +00:00
|
|
|
struct spdk_bdev_module_channel *module_ch;
|
2017-04-04 21:01:54 +00:00
|
|
|
|
2018-02-19 22:21:15 +00:00
|
|
|
ch->bdev = bdev;
|
2017-05-18 17:48:04 +00:00
|
|
|
ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt);
|
2017-10-02 12:45:06 +00:00
|
|
|
if (!ch->channel) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
ch->mgmt_channel = spdk_get_io_channel(&g_bdev_mgr);
|
2017-10-02 12:45:06 +00:00
|
|
|
if (!ch->mgmt_channel) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
mgmt_ch = spdk_io_channel_get_ctx(ch->mgmt_channel);
|
2018-04-05 22:33:48 +00:00
|
|
|
TAILQ_FOREACH(module_ch, &mgmt_ch->module_channels, link) {
|
|
|
|
if (module_ch->module_ch == ch->channel) {
|
|
|
|
module_ch->ref++;
|
2018-01-14 11:48:01 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-05 22:33:48 +00:00
|
|
|
if (module_ch == NULL) {
|
|
|
|
module_ch = calloc(1, sizeof(*module_ch));
|
|
|
|
if (!module_ch) {
|
2018-01-14 11:48:01 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch->io_outstanding = 0;
|
|
|
|
TAILQ_INIT(&module_ch->nomem_io);
|
|
|
|
module_ch->nomem_threshold = 0;
|
|
|
|
module_ch->module_ch = ch->channel;
|
|
|
|
module_ch->ref = 1;
|
|
|
|
TAILQ_INSERT_TAIL(&mgmt_ch->module_channels, module_ch, link);
|
2018-01-14 11:48:01 +00:00
|
|
|
}
|
|
|
|
|
2017-04-06 21:40:29 +00:00
|
|
|
memset(&ch->stat, 0, sizeof(ch->stat));
|
2018-03-16 12:20:55 +00:00
|
|
|
ch->io_outstanding = 0;
|
2017-09-08 19:34:49 +00:00
|
|
|
TAILQ_INIT(&ch->queued_resets);
|
2017-12-28 02:18:12 +00:00
|
|
|
TAILQ_INIT(&ch->qos_io);
|
|
|
|
ch->qos_max_ios_per_timeslice = 0;
|
|
|
|
ch->io_submitted_this_timeslice = 0;
|
|
|
|
ch->qos_poller = NULL;
|
2017-09-08 18:44:50 +00:00
|
|
|
ch->flags = 0;
|
2018-04-05 22:33:48 +00:00
|
|
|
ch->module_ch = module_ch;
|
2017-04-04 21:01:54 +00:00
|
|
|
|
2018-01-21 23:03:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_channel_destroy_resource(struct spdk_bdev_channel *ch)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_channel;
|
2018-04-05 22:33:48 +00:00
|
|
|
struct spdk_bdev_module_channel *module_ch = NULL;
|
2017-12-28 03:11:55 +00:00
|
|
|
|
|
|
|
if (!ch) {
|
|
|
|
return;
|
|
|
|
}
|
2018-01-21 23:03:27 +00:00
|
|
|
|
|
|
|
if (ch->channel) {
|
|
|
|
spdk_put_io_channel(ch->channel);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ch->mgmt_channel) {
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch = ch->module_ch;
|
|
|
|
if (module_ch) {
|
2018-03-16 12:20:55 +00:00
|
|
|
assert(ch->io_outstanding == 0);
|
2018-04-05 22:33:48 +00:00
|
|
|
assert(module_ch->ref > 0);
|
|
|
|
module_ch->ref--;
|
|
|
|
if (module_ch->ref == 0) {
|
2018-01-21 23:03:27 +00:00
|
|
|
mgmt_channel = spdk_io_channel_get_ctx(ch->mgmt_channel);
|
2018-04-05 22:33:48 +00:00
|
|
|
assert(module_ch->io_outstanding == 0);
|
|
|
|
TAILQ_REMOVE(&mgmt_channel->module_channels, module_ch, link);
|
|
|
|
free(module_ch);
|
2018-01-21 23:03:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
spdk_put_io_channel(ch->mgmt_channel);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-09 21:46:47 +00:00
|
|
|
/* Caller must hold bdev->mutex. */
|
2017-12-28 03:11:55 +00:00
|
|
|
static int
|
2018-03-09 21:46:47 +00:00
|
|
|
spdk_bdev_qos_channel_create(struct spdk_bdev *bdev)
|
2017-12-28 03:11:55 +00:00
|
|
|
{
|
2018-03-09 21:46:47 +00:00
|
|
|
assert(bdev->qos_channel == NULL);
|
|
|
|
assert(bdev->qos_thread == NULL);
|
|
|
|
|
2017-12-28 03:11:55 +00:00
|
|
|
bdev->qos_channel = calloc(1, sizeof(struct spdk_bdev_channel));
|
|
|
|
if (!bdev->qos_channel) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev->qos_thread = spdk_get_thread();
|
|
|
|
if (!bdev->qos_thread) {
|
2018-03-09 21:46:47 +00:00
|
|
|
free(bdev->qos_channel);
|
|
|
|
bdev->qos_channel = NULL;
|
2017-12-28 03:11:55 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (_spdk_bdev_channel_create(bdev->qos_channel, __bdev_to_io_dev(bdev)) != 0) {
|
2018-03-09 21:46:47 +00:00
|
|
|
free(bdev->qos_channel);
|
|
|
|
bdev->qos_channel = NULL;
|
|
|
|
bdev->qos_thread = NULL;
|
2017-12-28 03:11:55 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev->qos_channel->flags |= BDEV_CH_QOS_ENABLED;
|
2018-03-27 05:50:44 +00:00
|
|
|
spdk_bdev_qos_get_max_ios_per_timeslice(bdev->qos_channel);
|
2018-03-09 20:24:02 +00:00
|
|
|
bdev->qos_channel->qos_poller = spdk_poller_register(
|
|
|
|
spdk_bdev_channel_poll_qos,
|
|
|
|
bdev->qos_channel,
|
|
|
|
SPDK_BDEV_QOS_TIMESLICE_IN_USEC);
|
2017-12-28 03:11:55 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-01-21 23:03:27 +00:00
|
|
|
static int
|
|
|
|
spdk_bdev_channel_create(void *io_device, void *ctx_buf)
|
|
|
|
{
|
2017-12-28 03:11:55 +00:00
|
|
|
struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
|
2018-01-21 23:03:27 +00:00
|
|
|
struct spdk_bdev_channel *ch = ctx_buf;
|
|
|
|
|
|
|
|
if (_spdk_bdev_channel_create(ch, io_device) != 0) {
|
|
|
|
_spdk_bdev_channel_destroy_resource(ch);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-05-30 08:45:46 +00:00
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
{
|
|
|
|
char *name;
|
2017-06-15 16:59:02 +00:00
|
|
|
__itt_init_ittlib(NULL, 0);
|
2017-05-30 08:45:46 +00:00
|
|
|
name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch);
|
|
|
|
if (!name) {
|
2018-02-04 20:39:55 +00:00
|
|
|
_spdk_bdev_channel_destroy_resource(ch);
|
2017-05-30 08:45:46 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
ch->handle = __itt_string_handle_create(name);
|
|
|
|
free(name);
|
|
|
|
ch->start_tsc = spdk_get_ticks();
|
|
|
|
ch->interval_tsc = spdk_get_ticks_hz() / 100;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-03-09 21:46:47 +00:00
|
|
|
pthread_mutex_lock(&bdev->mutex);
|
|
|
|
|
|
|
|
/* Rate limiting on this bdev enabled */
|
|
|
|
if (bdev->ios_per_sec > 0 && bdev->qos_channel == NULL) {
|
|
|
|
if (spdk_bdev_qos_channel_create(bdev) != 0) {
|
|
|
|
_spdk_bdev_channel_destroy_resource(ch);
|
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev->channel_count++;
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
|
|
|
|
2017-04-04 21:01:54 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-09-12 19:14:48 +00:00
|
|
|
/*
|
|
|
|
* Abort I/O that are waiting on a data buffer. These types of I/O are
|
|
|
|
* linked using the spdk_bdev_io buf_link TAILQ_ENTRY.
|
|
|
|
*/
|
2017-04-04 21:01:54 +00:00
|
|
|
static void
|
2018-01-05 21:55:38 +00:00
|
|
|
_spdk_bdev_abort_buf_io(bdev_io_stailq_t *queue, struct spdk_bdev_channel *ch)
|
2017-04-04 21:01:54 +00:00
|
|
|
{
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t tmp;
|
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
|
|
|
|
STAILQ_INIT(&tmp);
|
2017-05-11 16:48:07 +00:00
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
while (!STAILQ_EMPTY(queue)) {
|
|
|
|
bdev_io = STAILQ_FIRST(queue);
|
|
|
|
STAILQ_REMOVE_HEAD(queue, buf_link);
|
2017-05-11 16:48:07 +00:00
|
|
|
if (bdev_io->ch == ch) {
|
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
2018-01-05 21:55:38 +00:00
|
|
|
} else {
|
|
|
|
STAILQ_INSERT_TAIL(&tmp, bdev_io, buf_link);
|
2017-05-11 16:48:07 +00:00
|
|
|
}
|
|
|
|
}
|
2018-01-05 21:55:38 +00:00
|
|
|
|
|
|
|
STAILQ_SWAP(&tmp, queue, spdk_bdev_io);
|
2017-05-25 20:11:33 +00:00
|
|
|
}
|
2017-05-11 16:48:07 +00:00
|
|
|
|
2017-09-12 19:14:48 +00:00
|
|
|
/*
|
|
|
|
* Abort I/O that are queued waiting for submission. These types of I/O are
|
|
|
|
* linked using the spdk_bdev_io link TAILQ_ENTRY.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_spdk_bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_io *bdev_io, *tmp;
|
|
|
|
|
|
|
|
TAILQ_FOREACH_SAFE(bdev_io, queue, link, tmp) {
|
|
|
|
if (bdev_io->ch == ch) {
|
|
|
|
TAILQ_REMOVE(queue, bdev_io, link);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
/*
|
|
|
|
* spdk_bdev_io_complete() assumes that the completed I/O had
|
|
|
|
* been submitted to the bdev module. Since in this case it
|
|
|
|
* hadn't, bump io_outstanding to account for the decrement
|
|
|
|
* that spdk_bdev_io_complete() will do.
|
|
|
|
*/
|
|
|
|
if (bdev_io->type != SPDK_BDEV_IO_TYPE_RESET) {
|
2018-03-16 12:20:55 +00:00
|
|
|
ch->io_outstanding++;
|
2018-01-14 11:48:01 +00:00
|
|
|
ch->module_ch->io_outstanding++;
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
}
|
2017-09-12 19:14:48 +00:00
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-25 20:11:33 +00:00
|
|
|
static void
|
2018-01-21 23:03:27 +00:00
|
|
|
_spdk_bdev_channel_destroy(struct spdk_bdev_channel *ch)
|
2017-05-25 20:11:33 +00:00
|
|
|
{
|
2017-05-10 21:42:45 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_channel;
|
2018-04-05 22:33:48 +00:00
|
|
|
struct spdk_bdev_module_channel *module_ch = ch->module_ch;
|
2017-05-10 21:42:45 +00:00
|
|
|
|
|
|
|
mgmt_channel = spdk_io_channel_get_ctx(ch->mgmt_channel);
|
2017-05-25 20:11:33 +00:00
|
|
|
|
2017-09-12 19:14:48 +00:00
|
|
|
_spdk_bdev_abort_queued_io(&ch->queued_resets, ch);
|
2017-12-28 02:18:12 +00:00
|
|
|
_spdk_bdev_abort_queued_io(&ch->qos_io, ch);
|
2018-04-05 22:33:48 +00:00
|
|
|
_spdk_bdev_abort_queued_io(&module_ch->nomem_io, ch);
|
2017-09-12 19:14:48 +00:00
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, ch);
|
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, ch);
|
2017-04-04 21:01:54 +00:00
|
|
|
|
2018-02-04 20:39:55 +00:00
|
|
|
_spdk_bdev_channel_destroy_resource(ch);
|
2017-04-04 21:01:54 +00:00
|
|
|
}
|
|
|
|
|
2017-12-28 03:11:55 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_qos_channel_destroy(void *ctx)
|
|
|
|
{
|
2018-03-09 21:46:47 +00:00
|
|
|
struct spdk_bdev_channel *qos_channel = ctx;
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-03-09 21:46:47 +00:00
|
|
|
_spdk_bdev_channel_destroy(qos_channel);
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-03-09 21:46:47 +00:00
|
|
|
spdk_poller_unregister(&qos_channel->qos_poller);
|
|
|
|
free(qos_channel);
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
|
|
|
|
2018-01-21 23:03:27 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_channel_destroy(void *io_device, void *ctx_buf)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_channel *ch = ctx_buf;
|
2017-12-28 03:11:55 +00:00
|
|
|
struct spdk_bdev *bdev = ch->bdev;
|
2018-01-21 23:03:27 +00:00
|
|
|
|
|
|
|
_spdk_bdev_channel_destroy(ch);
|
2017-12-28 03:11:55 +00:00
|
|
|
|
|
|
|
pthread_mutex_lock(&bdev->mutex);
|
|
|
|
bdev->channel_count--;
|
2018-03-09 21:46:47 +00:00
|
|
|
if (bdev->channel_count == 0 && bdev->qos_channel != NULL) {
|
|
|
|
/* All I/O channels for this bdev have been destroyed - destroy the QoS channel. */
|
|
|
|
spdk_thread_send_msg(bdev->qos_thread, spdk_bdev_qos_channel_destroy,
|
|
|
|
bdev->qos_channel);
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-03-09 21:46:47 +00:00
|
|
|
/*
|
|
|
|
* Set qos_channel to NULL within the critical section so that
|
|
|
|
* if another channel is created, it will see qos_channel == NULL and
|
|
|
|
* re-create the QoS channel even if the asynchronous qos_channel_destroy
|
|
|
|
* isn't finished yet.
|
|
|
|
*/
|
|
|
|
bdev->qos_channel = NULL;
|
|
|
|
bdev->qos_thread = NULL;
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
2018-03-09 21:46:47 +00:00
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
2018-01-21 23:03:27 +00:00
|
|
|
}
|
|
|
|
|
2017-11-29 15:13:17 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_alias *tmp;
|
|
|
|
|
|
|
|
if (alias == NULL) {
|
|
|
|
SPDK_ERRLOG("Empty alias passed\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (spdk_bdev_get_by_name(alias)) {
|
|
|
|
SPDK_ERRLOG("Bdev name/alias: %s already exists\n", alias);
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp = calloc(1, sizeof(*tmp));
|
|
|
|
if (tmp == NULL) {
|
|
|
|
SPDK_ERRLOG("Unable to allocate alias\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp->alias = strdup(alias);
|
|
|
|
if (tmp->alias == NULL) {
|
|
|
|
free(tmp);
|
|
|
|
SPDK_ERRLOG("Unable to allocate alias\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
TAILQ_INSERT_TAIL(&bdev->aliases, tmp, tailq);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_alias *tmp;
|
|
|
|
|
|
|
|
TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
|
|
|
|
if (strcmp(alias, tmp->alias) == 0) {
|
|
|
|
TAILQ_REMOVE(&bdev->aliases, tmp, tailq);
|
|
|
|
free(tmp->alias);
|
|
|
|
free(tmp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SPDK_INFOLOG(SPDK_LOG_BDEV, "Alias %s does not exists\n", alias);
|
|
|
|
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2016-09-16 19:53:32 +00:00
|
|
|
struct spdk_io_channel *
|
2017-06-29 18:23:50 +00:00
|
|
|
spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc)
|
2016-09-16 19:53:32 +00:00
|
|
|
{
|
2018-02-19 22:21:15 +00:00
|
|
|
return spdk_get_io_channel(__bdev_to_io_dev(desc->bdev));
|
2016-09-16 19:53:32 +00:00
|
|
|
}
|
|
|
|
|
2017-05-10 20:29:31 +00:00
|
|
|
const char *
|
|
|
|
spdk_bdev_get_name(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->name;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *
|
|
|
|
spdk_bdev_get_product_name(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->product_name;
|
|
|
|
}
|
|
|
|
|
2017-12-07 10:40:18 +00:00
|
|
|
const struct spdk_bdev_aliases_list *
|
|
|
|
spdk_bdev_get_aliases(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return &bdev->aliases;
|
|
|
|
}
|
|
|
|
|
2017-05-12 17:29:00 +00:00
|
|
|
uint32_t
|
|
|
|
spdk_bdev_get_block_size(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->blocklen;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->blockcnt;
|
|
|
|
}
|
|
|
|
|
2017-05-09 17:57:32 +00:00
|
|
|
size_t
|
|
|
|
spdk_bdev_get_buf_align(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
/* TODO: push this logic down to the bdev modules */
|
|
|
|
if (bdev->need_aligned_buffer) {
|
|
|
|
return bdev->blocklen;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2017-08-16 20:56:10 +00:00
|
|
|
uint32_t
|
|
|
|
spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->optimal_io_boundary;
|
|
|
|
}
|
|
|
|
|
2017-05-16 19:57:33 +00:00
|
|
|
bool
|
|
|
|
spdk_bdev_has_write_cache(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->write_cache;
|
|
|
|
}
|
|
|
|
|
2018-03-02 01:27:44 +00:00
|
|
|
const struct spdk_uuid *
|
|
|
|
spdk_bdev_get_uuid(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return &bdev->uuid;
|
|
|
|
}
|
|
|
|
|
2018-01-26 10:00:36 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
pthread_mutex_lock(&bdev->mutex);
|
|
|
|
|
|
|
|
/* bdev has open descriptors */
|
|
|
|
if (!TAILQ_EMPTY(&bdev->open_descs) &&
|
|
|
|
bdev->blockcnt > size) {
|
|
|
|
ret = -EBUSY;
|
|
|
|
} else {
|
|
|
|
bdev->blockcnt = size;
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
/*
|
|
|
|
* Convert I/O offset and length from bytes to blocks.
|
|
|
|
*
|
|
|
|
* Returns zero on success or non-zero if the byte parameters aren't divisible by the block size.
|
|
|
|
*/
|
|
|
|
static uint64_t
|
|
|
|
spdk_bdev_bytes_to_blocks(struct spdk_bdev *bdev, uint64_t offset_bytes, uint64_t *offset_blocks,
|
|
|
|
uint64_t num_bytes, uint64_t *num_blocks)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-08-28 21:55:35 +00:00
|
|
|
uint32_t block_size = bdev->blocklen;
|
2017-08-04 21:18:13 +00:00
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
*offset_blocks = offset_bytes / block_size;
|
|
|
|
*num_blocks = num_bytes / block_size;
|
|
|
|
|
|
|
|
return (offset_bytes % block_size) | (num_bytes % block_size);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
static bool
|
|
|
|
spdk_bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64_t num_blocks)
|
|
|
|
{
|
|
|
|
/* Return failure if offset_blocks + num_blocks is less than offset_blocks; indicates there
|
2016-07-20 18:16:23 +00:00
|
|
|
* has been an overflow and hence the offset has been wrapped around */
|
2017-08-28 21:55:35 +00:00
|
|
|
if (offset_blocks + num_blocks < offset_blocks) {
|
2017-08-04 21:15:46 +00:00
|
|
|
return false;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
/* Return failure if offset_blocks + num_blocks exceeds the size of the bdev */
|
|
|
|
if (offset_blocks + num_blocks > bdev->blockcnt) {
|
2017-08-04 21:15:46 +00:00
|
|
|
return false;
|
2016-10-18 13:45:01 +00:00
|
|
|
}
|
|
|
|
|
2017-08-04 21:15:46 +00:00
|
|
|
return true;
|
2016-10-18 13:45:01 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2016-10-18 13:45:01 +00:00
|
|
|
void *buf, uint64_t offset, uint64_t nbytes,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
|
|
|
if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
void *buf, uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-10-18 13:45:01 +00:00
|
|
|
{
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2016-10-18 13:45:01 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-10-18 13:45:01 +00:00
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-04 20:18:03 +00:00
|
|
|
bdev_io->ch = channel;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iov.iov_base = buf;
|
|
|
|
bdev_io->u.bdev.iov.iov_len = num_blocks * bdev->blocklen;
|
|
|
|
bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = 1;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2016-10-04 14:39:27 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-10-04 14:39:27 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2016-10-04 14:39:27 +00:00
|
|
|
struct iovec *iov, int iovcnt,
|
|
|
|
uint64_t offset, uint64_t nbytes,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
|
|
|
if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-10-04 14:39:27 +00:00
|
|
|
{
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2016-10-04 14:39:27 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-10-04 14:39:27 +00:00
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-10-04 14:39:27 +00:00
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2016-10-04 14:39:27 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-10-04 14:39:27 +00:00
|
|
|
}
|
|
|
|
|
2017-05-04 20:18:03 +00:00
|
|
|
bdev_io->ch = channel;
|
2016-10-04 14:39:27 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iovs = iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = iovcnt;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2016-09-14 17:34:48 +00:00
|
|
|
void *buf, uint64_t offset, uint64_t nbytes,
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
|
|
|
if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
void *buf, uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
2017-07-13 04:08:53 +00:00
|
|
|
SPDK_ERRLOG("bdev_io memory allocation failed duing write\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-04 20:18:03 +00:00
|
|
|
bdev_io->ch = channel;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iov.iov_base = buf;
|
|
|
|
bdev_io->u.bdev.iov.iov_len = num_blocks * bdev->blocklen;
|
|
|
|
bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = 1;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2016-07-20 18:16:23 +00:00
|
|
|
struct iovec *iov, int iovcnt,
|
2016-09-14 17:34:48 +00:00
|
|
|
uint64_t offset, uint64_t len,
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
|
|
|
if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_writev_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-04 20:18:03 +00:00
|
|
|
bdev_io->ch = channel;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iovs = iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = iovcnt;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-08-01 18:28:29 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
uint64_t offset, uint64_t len,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
|
|
|
if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-01 18:28:29 +00:00
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2017-07-28 22:34:24 +00:00
|
|
|
uint64_t len;
|
|
|
|
bool split_request = false;
|
|
|
|
|
|
|
|
if (num_blocks > UINT64_MAX / spdk_bdev_get_block_size(bdev)) {
|
|
|
|
SPDK_ERRLOG("length argument out of range in write_zeroes\n");
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
2017-08-01 18:28:29 +00:00
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-08-01 18:28:29 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2017-07-28 22:34:24 +00:00
|
|
|
|
2017-08-01 18:28:29 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("bdev_io memory allocation failed duing write_zeroes\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io->ch = channel;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2017-08-01 18:28:29 +00:00
|
|
|
|
2017-07-28 22:34:24 +00:00
|
|
|
if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
|
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.iovs = NULL;
|
|
|
|
bdev_io->u.bdev.iovcnt = 0;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
assert(spdk_bdev_get_block_size(bdev) <= ZERO_BUFFER_SIZE);
|
|
|
|
|
|
|
|
len = spdk_bdev_get_block_size(bdev) * num_blocks;
|
|
|
|
|
|
|
|
if (len > ZERO_BUFFER_SIZE) {
|
|
|
|
split_request = true;
|
|
|
|
len = ZERO_BUFFER_SIZE;
|
|
|
|
}
|
2017-08-01 18:28:29 +00:00
|
|
|
|
2017-07-28 22:34:24 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
|
|
|
|
bdev_io->u.bdev.iov.iov_base = g_bdev_mgr.zero_buffer;
|
|
|
|
bdev_io->u.bdev.iov.iov_len = len;
|
|
|
|
bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = 1;
|
|
|
|
bdev_io->u.bdev.num_blocks = len / spdk_bdev_get_block_size(bdev);
|
2018-03-16 01:20:14 +00:00
|
|
|
bdev_io->u.bdev.split_remaining_num_blocks = num_blocks - bdev_io->u.bdev.num_blocks;
|
|
|
|
bdev_io->u.bdev.split_current_offset_blocks = offset_blocks + bdev_io->u.bdev.num_blocks;
|
2017-07-28 22:34:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (split_request) {
|
2018-03-16 01:20:14 +00:00
|
|
|
bdev_io->u.bdev.stored_user_cb = cb;
|
2017-07-28 22:34:24 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, spdk_bdev_write_zeroes_split);
|
|
|
|
} else {
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
}
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-08-01 18:28:29 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2017-07-19 21:32:04 +00:00
|
|
|
uint64_t offset, uint64_t nbytes,
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
|
|
|
if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-10-05 20:57:49 +00:00
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (num_blocks == 0) {
|
2017-07-19 21:32:04 +00:00
|
|
|
SPDK_ERRLOG("Can't unmap 0 bytes\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-10-05 20:57:49 +00:00
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-04 20:18:03 +00:00
|
|
|
bdev_io->ch = channel;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP;
|
2017-10-18 13:16:02 +00:00
|
|
|
bdev_io->u.bdev.iov.iov_base = NULL;
|
|
|
|
bdev_io->u.bdev.iov.iov_len = 0;
|
2017-09-08 13:07:10 +00:00
|
|
|
bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = 1;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2016-07-20 18:16:23 +00:00
|
|
|
uint64_t offset, uint64_t length,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
|
|
|
if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, length, &num_blocks) != 0) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_flush_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-04 20:18:03 +00:00
|
|
|
bdev_io->ch = channel;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH;
|
2017-09-08 13:07:10 +00:00
|
|
|
bdev_io->u.bdev.iovs = NULL;
|
|
|
|
bdev_io->u.bdev.iovcnt = 0;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-25 20:11:33 +00:00
|
|
|
static void
|
2017-12-11 22:14:19 +00:00
|
|
|
_spdk_bdev_reset_dev(struct spdk_io_channel_iter *i, int status)
|
2017-05-25 20:11:33 +00:00
|
|
|
{
|
2017-12-11 22:14:19 +00:00
|
|
|
struct spdk_bdev_channel *ch = spdk_io_channel_iter_get_ctx(i);
|
2017-09-12 16:34:55 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-05-25 20:11:33 +00:00
|
|
|
|
2017-09-12 16:34:55 +00:00
|
|
|
bdev_io = TAILQ_FIRST(&ch->queued_resets);
|
|
|
|
TAILQ_REMOVE(&ch->queued_resets, bdev_io, link);
|
2017-09-14 21:02:09 +00:00
|
|
|
spdk_bdev_io_submit_reset(bdev_io);
|
2017-05-25 20:11:33 +00:00
|
|
|
}
|
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_reset_freeze_channel(struct spdk_io_channel_iter *i)
|
2017-05-25 20:11:33 +00:00
|
|
|
{
|
2018-03-02 19:49:36 +00:00
|
|
|
struct spdk_io_channel *ch;
|
2017-05-25 20:11:33 +00:00
|
|
|
struct spdk_bdev_channel *channel;
|
2017-05-10 21:42:45 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_channel;
|
2018-04-05 22:33:48 +00:00
|
|
|
struct spdk_bdev_module_channel *module_ch;
|
2017-05-25 20:11:33 +00:00
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
ch = spdk_io_channel_iter_get_channel(i);
|
2017-05-25 20:11:33 +00:00
|
|
|
channel = spdk_io_channel_get_ctx(ch);
|
2017-05-10 21:42:45 +00:00
|
|
|
mgmt_channel = spdk_io_channel_get_ctx(channel->mgmt_channel);
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch = channel->module_ch;
|
2017-05-25 20:11:33 +00:00
|
|
|
|
2017-09-08 18:44:50 +00:00
|
|
|
channel->flags |= BDEV_CH_RESET_IN_PROGRESS;
|
|
|
|
|
2018-04-05 22:33:48 +00:00
|
|
|
_spdk_bdev_abort_queued_io(&module_ch->nomem_io, channel);
|
2017-12-28 02:18:12 +00:00
|
|
|
_spdk_bdev_abort_queued_io(&channel->qos_io, channel);
|
2017-09-12 19:14:48 +00:00
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, channel);
|
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, channel);
|
2017-11-16 07:42:37 +00:00
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
spdk_for_each_channel_continue(i, 0);
|
2017-05-25 20:11:33 +00:00
|
|
|
}
|
|
|
|
|
2017-12-28 09:25:15 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_reset_freeze_qos_channel(void *ctx)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = ctx;
|
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_channel = NULL;
|
|
|
|
struct spdk_bdev_channel *qos_channel = bdev->qos_channel;
|
2018-04-05 22:33:48 +00:00
|
|
|
struct spdk_bdev_module_channel *module_ch = NULL;
|
2017-12-28 09:25:15 +00:00
|
|
|
|
|
|
|
if (qos_channel) {
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch = qos_channel->module_ch;
|
2017-12-28 09:25:15 +00:00
|
|
|
mgmt_channel = spdk_io_channel_get_ctx(qos_channel->mgmt_channel);
|
|
|
|
|
|
|
|
qos_channel->flags |= BDEV_CH_RESET_IN_PROGRESS;
|
|
|
|
|
2018-04-05 22:33:48 +00:00
|
|
|
_spdk_bdev_abort_queued_io(&module_ch->nomem_io, qos_channel);
|
2017-12-28 09:25:15 +00:00
|
|
|
_spdk_bdev_abort_queued_io(&qos_channel->qos_io, qos_channel);
|
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, qos_channel);
|
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, qos_channel);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-15 14:17:12 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_start_reset(void *ctx)
|
|
|
|
{
|
2017-09-12 16:34:55 +00:00
|
|
|
struct spdk_bdev_channel *ch = ctx;
|
2017-06-15 14:17:12 +00:00
|
|
|
|
2018-02-19 22:21:15 +00:00
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(ch->bdev), _spdk_bdev_reset_freeze_channel,
|
2017-09-12 16:34:55 +00:00
|
|
|
ch, _spdk_bdev_reset_dev);
|
2017-06-15 14:17:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-09-08 19:34:49 +00:00
|
|
|
_spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch)
|
2017-06-15 14:17:12 +00:00
|
|
|
{
|
2017-09-08 19:34:49 +00:00
|
|
|
struct spdk_bdev *bdev = ch->bdev;
|
2017-06-15 14:17:12 +00:00
|
|
|
|
2017-09-08 19:34:49 +00:00
|
|
|
assert(!TAILQ_EMPTY(&ch->queued_resets));
|
2017-06-15 14:17:12 +00:00
|
|
|
|
2017-09-08 19:34:49 +00:00
|
|
|
pthread_mutex_lock(&bdev->mutex);
|
2017-09-12 19:14:48 +00:00
|
|
|
if (bdev->reset_in_progress == NULL) {
|
|
|
|
bdev->reset_in_progress = TAILQ_FIRST(&ch->queued_resets);
|
2017-09-12 16:34:55 +00:00
|
|
|
/*
|
|
|
|
* Take a channel reference for the target bdev for the life of this
|
|
|
|
* reset. This guards against the channel getting destroyed while
|
|
|
|
* spdk_for_each_channel() calls related to this reset IO are in
|
|
|
|
* progress. We will release the reference when this reset is
|
|
|
|
* completed.
|
|
|
|
*/
|
2018-02-19 22:21:15 +00:00
|
|
|
bdev->reset_in_progress->u.reset.ch_ref = spdk_get_io_channel(__bdev_to_io_dev(bdev));
|
2017-09-12 16:34:55 +00:00
|
|
|
_spdk_bdev_start_reset(ch);
|
2017-06-15 14:17:12 +00:00
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-05-23 17:51:50 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n");
|
2017-09-26 21:45:22 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-23 17:51:50 +00:00
|
|
|
bdev_io->ch = channel;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_RESET;
|
2017-09-12 16:34:55 +00:00
|
|
|
bdev_io->u.reset.ch_ref = NULL;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-06-15 14:17:12 +00:00
|
|
|
pthread_mutex_lock(&bdev->mutex);
|
2017-09-08 19:34:49 +00:00
|
|
|
TAILQ_INSERT_TAIL(&channel->queued_resets, bdev_io, link);
|
2017-06-15 14:17:12 +00:00
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
|
|
|
|
2017-09-08 19:34:49 +00:00
|
|
|
_spdk_bdev_channel_start_reset(channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-12-28 09:25:15 +00:00
|
|
|
/* Explicitly handle the QoS bdev channel as no IO channel associated */
|
|
|
|
if (bdev->qos_thread) {
|
|
|
|
spdk_thread_send_msg(bdev->qos_thread,
|
|
|
|
_spdk_bdev_reset_freeze_qos_channel, bdev);
|
|
|
|
}
|
|
|
|
|
2017-05-25 20:11:33 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-04-06 21:40:29 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
|
|
|
|
struct spdk_bdev_io_stat *stat)
|
|
|
|
{
|
2017-05-30 08:45:46 +00:00
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
SPDK_ERRLOG("Calling spdk_bdev_get_io_stat is not allowed when VTune integration is enabled.\n");
|
|
|
|
memset(stat, 0, sizeof(*stat));
|
|
|
|
return;
|
|
|
|
#endif
|
2017-04-06 21:40:29 +00:00
|
|
|
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
2017-12-06 19:02:51 +00:00
|
|
|
channel->stat.ticks_rate = spdk_get_ticks_hz();
|
2017-04-06 21:40:29 +00:00
|
|
|
*stat = channel->stat;
|
|
|
|
memset(&channel->stat, 0, sizeof(channel->stat));
|
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2017-05-13 20:12:13 +00:00
|
|
|
const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2017-05-13 20:12:13 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2017-05-13 20:12:13 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2017-05-13 20:12:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io->ch = channel;
|
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN;
|
|
|
|
bdev_io->u.nvme_passthru.cmd = *cmd;
|
|
|
|
bdev_io->u.nvme_passthru.buf = buf;
|
|
|
|
bdev_io->u.nvme_passthru.nbytes = nbytes;
|
2017-11-14 06:33:11 +00:00
|
|
|
bdev_io->u.nvme_passthru.md_buf = NULL;
|
|
|
|
bdev_io->u.nvme_passthru.md_len = 0;
|
2017-05-13 20:12:13 +00:00
|
|
|
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2017-05-13 20:12:13 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2017-06-05 18:02:09 +00:00
|
|
|
const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2017-06-05 18:02:09 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
/*
|
|
|
|
* Do not try to parse the NVMe command - we could maybe use bits in the opcode
|
|
|
|
* to easily determine if the command is a read or write, but for now just
|
|
|
|
* do not allow io_passthru with a read-only descriptor.
|
|
|
|
*/
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2017-06-05 18:02:09 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2017-06-05 18:02:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io->ch = channel;
|
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO;
|
|
|
|
bdev_io->u.nvme_passthru.cmd = *cmd;
|
|
|
|
bdev_io->u.nvme_passthru.buf = buf;
|
|
|
|
bdev_io->u.nvme_passthru.nbytes = nbytes;
|
2017-11-14 06:33:11 +00:00
|
|
|
bdev_io->u.nvme_passthru.md_buf = NULL;
|
|
|
|
bdev_io->u.nvme_passthru.md_len = 0;
|
|
|
|
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
|
|
|
spdk_bdev_io_submit(bdev_io);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
|
|
|
if (!desc->write) {
|
|
|
|
/*
|
|
|
|
* Do not try to parse the NVMe command - we could maybe use bits in the opcode
|
|
|
|
* to easily determine if the command is a read or write, but for now just
|
|
|
|
* do not allow io_passthru with a read-only descriptor.
|
|
|
|
*/
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel->mgmt_channel);
|
2017-11-14 06:33:11 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io->ch = channel;
|
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO_MD;
|
|
|
|
bdev_io->u.nvme_passthru.cmd = *cmd;
|
|
|
|
bdev_io->u.nvme_passthru.buf = buf;
|
|
|
|
bdev_io->u.nvme_passthru.nbytes = nbytes;
|
|
|
|
bdev_io->u.nvme_passthru.md_buf = md_buf;
|
|
|
|
bdev_io->u.nvme_passthru.md_len = md_len;
|
2017-06-05 18:02:09 +00:00
|
|
|
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2017-06-05 18:02:09 +00:00
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
if (!bdev_io) {
|
|
|
|
SPDK_ERRLOG("bdev_io is NULL\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) {
|
|
|
|
SPDK_ERRLOG("bdev_io is in pending state\n");
|
2017-05-04 19:31:57 +00:00
|
|
|
assert(false);
|
2016-07-20 18:16:23 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-01-11 23:38:11 +00:00
|
|
|
spdk_bdev_put_io(bdev_io);
|
|
|
|
|
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = bdev_ch->bdev;
|
2018-04-05 22:33:48 +00:00
|
|
|
struct spdk_bdev_module_channel *module_ch = bdev_ch->module_ch;
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
|
2018-04-05 22:33:48 +00:00
|
|
|
if (module_ch->io_outstanding > module_ch->nomem_threshold) {
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
/*
|
|
|
|
* Allow some more I/O to complete before retrying the nomem_io queue.
|
|
|
|
* Some drivers (such as nvme) cannot immediately take a new I/O in
|
|
|
|
* the context of a completion, because the resources for the I/O are
|
|
|
|
* not released until control returns to the bdev poller. Also, we
|
|
|
|
* may require several small I/O to complete before a larger I/O
|
|
|
|
* (that requires splitting) can be submitted.
|
|
|
|
*/
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-04-05 22:33:48 +00:00
|
|
|
while (!TAILQ_EMPTY(&module_ch->nomem_io)) {
|
|
|
|
bdev_io = TAILQ_FIRST(&module_ch->nomem_io);
|
|
|
|
TAILQ_REMOVE(&module_ch->nomem_io, bdev_io, link);
|
2018-03-16 12:20:55 +00:00
|
|
|
bdev_io->ch->io_outstanding++;
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch->io_outstanding++;
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING;
|
2018-01-14 11:48:01 +00:00
|
|
|
bdev->fn_table->submit_request(bdev_io->ch->channel, bdev_io);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
if (bdev_io->status == SPDK_BDEV_IO_STATUS_NOMEM) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-16 18:17:31 +00:00
|
|
|
static inline void
|
2017-06-15 18:48:27 +00:00
|
|
|
_spdk_bdev_io_complete(void *ctx)
|
2017-01-12 17:58:20 +00:00
|
|
|
{
|
2017-06-12 22:47:30 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = ctx;
|
2017-01-12 17:58:20 +00:00
|
|
|
|
2018-03-16 18:17:31 +00:00
|
|
|
if (spdk_unlikely(bdev_io->in_submit_request || bdev_io->io_submit_ch)) {
|
|
|
|
/*
|
|
|
|
* Send the completion to the thread that originally submitted the I/O,
|
|
|
|
* which may not be the current thread in the case of QoS.
|
|
|
|
*/
|
|
|
|
if (bdev_io->io_submit_ch) {
|
|
|
|
bdev_io->ch = bdev_io->io_submit_ch;
|
|
|
|
bdev_io->io_submit_ch = NULL;
|
|
|
|
}
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-03-16 18:17:31 +00:00
|
|
|
/*
|
|
|
|
* Defer completion to avoid potential infinite recursion if the
|
|
|
|
* user's completion callback issues a new I/O.
|
|
|
|
*/
|
2017-12-28 03:11:55 +00:00
|
|
|
spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_io->ch->channel),
|
2018-03-16 18:17:31 +00:00
|
|
|
_spdk_bdev_io_complete, bdev_io);
|
|
|
|
return;
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
2018-03-16 18:17:31 +00:00
|
|
|
|
|
|
|
assert(bdev_io->cb != NULL);
|
|
|
|
assert(spdk_get_thread() == spdk_io_channel_get_thread(bdev_io->ch->channel));
|
|
|
|
|
|
|
|
bdev_io->cb(bdev_io, bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS,
|
|
|
|
bdev_io->caller_ctx);
|
2017-01-12 17:58:20 +00:00
|
|
|
}
|
|
|
|
|
2017-12-28 09:25:15 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_unfreeze_qos_channel(void *ctx)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = ctx;
|
|
|
|
|
|
|
|
if (bdev->qos_channel) {
|
|
|
|
bdev->qos_channel->flags &= ~BDEV_CH_RESET_IN_PROGRESS;
|
|
|
|
assert(TAILQ_EMPTY(&bdev->qos_channel->queued_resets));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-06 21:52:20 +00:00
|
|
|
static void
|
2017-12-11 22:14:19 +00:00
|
|
|
_spdk_bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
|
2017-12-06 21:52:20 +00:00
|
|
|
{
|
2017-12-11 22:14:19 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i);
|
2017-12-06 21:52:20 +00:00
|
|
|
|
|
|
|
if (bdev_io->u.reset.ch_ref != NULL) {
|
|
|
|
spdk_put_io_channel(bdev_io->u.reset.ch_ref);
|
|
|
|
bdev_io->u.reset.ch_ref = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
_spdk_bdev_io_complete(bdev_io);
|
|
|
|
}
|
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_unfreeze_channel(struct spdk_io_channel_iter *i)
|
2017-12-06 21:52:20 +00:00
|
|
|
{
|
2017-12-11 22:14:19 +00:00
|
|
|
struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
|
2017-12-06 21:52:20 +00:00
|
|
|
struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch);
|
|
|
|
|
|
|
|
ch->flags &= ~BDEV_CH_RESET_IN_PROGRESS;
|
|
|
|
if (!TAILQ_EMPTY(&ch->queued_resets)) {
|
|
|
|
_spdk_bdev_channel_start_reset(ch);
|
|
|
|
}
|
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
spdk_for_each_channel_continue(i, 0);
|
2017-12-06 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
|
|
|
|
{
|
2017-09-15 22:23:49 +00:00
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
|
|
|
struct spdk_bdev_channel *bdev_ch = bdev_io->ch;
|
2018-04-05 22:33:48 +00:00
|
|
|
struct spdk_bdev_module_channel *module_ch = bdev_ch->module_ch;
|
2017-09-15 22:23:49 +00:00
|
|
|
|
2017-06-12 22:47:30 +00:00
|
|
|
bdev_io->status = status;
|
|
|
|
|
2017-09-14 21:02:09 +00:00
|
|
|
if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) {
|
2017-12-06 21:52:20 +00:00
|
|
|
bool unlock_channels = false;
|
|
|
|
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
if (status == SPDK_BDEV_IO_STATUS_NOMEM) {
|
|
|
|
SPDK_ERRLOG("NOMEM returned for reset\n");
|
|
|
|
}
|
2017-09-15 22:23:49 +00:00
|
|
|
pthread_mutex_lock(&bdev->mutex);
|
|
|
|
if (bdev_io == bdev->reset_in_progress) {
|
|
|
|
bdev->reset_in_progress = NULL;
|
2017-12-06 21:52:20 +00:00
|
|
|
unlock_channels = true;
|
2017-09-12 19:14:48 +00:00
|
|
|
}
|
2017-09-15 22:23:49 +00:00
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
2017-12-06 21:52:20 +00:00
|
|
|
|
|
|
|
if (unlock_channels) {
|
2017-12-28 09:25:15 +00:00
|
|
|
/* Explicitly handle the QoS bdev channel as no IO channel associated */
|
|
|
|
if (bdev->qos_thread) {
|
|
|
|
spdk_thread_send_msg(bdev->qos_thread,
|
|
|
|
_spdk_bdev_unfreeze_qos_channel, bdev);
|
|
|
|
}
|
|
|
|
|
2018-02-19 22:21:15 +00:00
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_unfreeze_channel,
|
|
|
|
bdev_io, _spdk_bdev_reset_complete);
|
2017-12-06 21:52:20 +00:00
|
|
|
return;
|
2017-09-12 16:34:55 +00:00
|
|
|
}
|
2017-09-14 21:02:09 +00:00
|
|
|
} else {
|
2018-03-16 12:20:55 +00:00
|
|
|
assert(bdev_ch->io_outstanding > 0);
|
2018-04-05 22:33:48 +00:00
|
|
|
assert(module_ch->io_outstanding > 0);
|
2018-03-16 12:20:55 +00:00
|
|
|
bdev_ch->io_outstanding--;
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch->io_outstanding--;
|
2018-03-16 17:40:47 +00:00
|
|
|
|
|
|
|
if (spdk_unlikely(status == SPDK_BDEV_IO_STATUS_NOMEM)) {
|
2018-04-05 22:33:48 +00:00
|
|
|
TAILQ_INSERT_HEAD(&module_ch->nomem_io, bdev_io, link);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
/*
|
|
|
|
* Wait for some of the outstanding I/O to complete before we
|
|
|
|
* retry any of the nomem_io. Normally we will wait for
|
|
|
|
* NOMEM_THRESHOLD_COUNT I/O to complete but for low queue
|
|
|
|
* depth channels we will instead wait for half to complete.
|
|
|
|
*/
|
2018-04-05 22:33:48 +00:00
|
|
|
module_ch->nomem_threshold = spdk_max((int64_t)module_ch->io_outstanding / 2,
|
|
|
|
(int64_t)module_ch->io_outstanding - NOMEM_THRESHOLD_COUNT);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
return;
|
|
|
|
}
|
2018-03-16 17:40:47 +00:00
|
|
|
|
2018-04-05 22:33:48 +00:00
|
|
|
if (spdk_unlikely(!TAILQ_EMPTY(&module_ch->nomem_io))) {
|
2018-03-16 17:40:47 +00:00
|
|
|
_spdk_bdev_ch_retry_io(bdev_ch);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-06-12 22:47:30 +00:00
|
|
|
if (status == SPDK_BDEV_IO_STATUS_SUCCESS) {
|
2017-04-06 21:40:29 +00:00
|
|
|
switch (bdev_io->type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
2017-09-15 22:23:49 +00:00
|
|
|
bdev_ch->stat.bytes_read += bdev_io->u.bdev.num_blocks * bdev->blocklen;
|
|
|
|
bdev_ch->stat.num_read_ops++;
|
2017-12-06 19:02:51 +00:00
|
|
|
bdev_ch->stat.read_latency_ticks += (spdk_get_ticks() - bdev_io->submit_tsc);
|
2017-04-06 21:40:29 +00:00
|
|
|
break;
|
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
2017-09-15 22:23:49 +00:00
|
|
|
bdev_ch->stat.bytes_written += bdev_io->u.bdev.num_blocks * bdev->blocklen;
|
|
|
|
bdev_ch->stat.num_write_ops++;
|
2017-12-06 19:02:51 +00:00
|
|
|
bdev_ch->stat.write_latency_ticks += (spdk_get_ticks() - bdev_io->submit_tsc);
|
2017-04-06 21:40:29 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-30 08:45:46 +00:00
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
uint64_t now_tsc = spdk_get_ticks();
|
2017-09-15 22:23:49 +00:00
|
|
|
if (now_tsc > (bdev_ch->start_tsc + bdev_ch->interval_tsc)) {
|
2017-06-15 16:59:02 +00:00
|
|
|
uint64_t data[5];
|
2017-05-30 08:45:46 +00:00
|
|
|
|
2017-09-15 22:23:49 +00:00
|
|
|
data[0] = bdev_ch->stat.num_read_ops;
|
|
|
|
data[1] = bdev_ch->stat.bytes_read;
|
|
|
|
data[2] = bdev_ch->stat.num_write_ops;
|
|
|
|
data[3] = bdev_ch->stat.bytes_written;
|
|
|
|
data[4] = bdev->fn_table->get_spin_time ?
|
|
|
|
bdev->fn_table->get_spin_time(bdev_ch->channel) : 0;
|
2017-05-30 08:45:46 +00:00
|
|
|
|
2017-09-15 22:23:49 +00:00
|
|
|
__itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_ch->handle,
|
2017-06-15 16:59:02 +00:00
|
|
|
__itt_metadata_u64, 5, data);
|
2017-05-30 08:45:46 +00:00
|
|
|
|
2017-09-15 22:23:49 +00:00
|
|
|
memset(&bdev_ch->stat, 0, sizeof(bdev_ch->stat));
|
|
|
|
bdev_ch->start_tsc = now_tsc;
|
2017-05-30 08:45:46 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-03-16 18:17:31 +00:00
|
|
|
_spdk_bdev_io_complete(bdev_io);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2016-11-07 20:14:47 +00:00
|
|
|
void
|
2017-01-18 22:20:31 +00:00
|
|
|
spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc,
|
|
|
|
enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq)
|
|
|
|
{
|
|
|
|
if (sc == SPDK_SCSI_STATUS_GOOD) {
|
|
|
|
bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
|
|
|
} else {
|
|
|
|
bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR;
|
|
|
|
bdev_io->error.scsi.sc = sc;
|
|
|
|
bdev_io->error.scsi.sk = sk;
|
|
|
|
bdev_io->error.scsi.asc = asc;
|
|
|
|
bdev_io->error.scsi.ascq = ascq;
|
|
|
|
}
|
|
|
|
|
|
|
|
spdk_bdev_io_complete(bdev_io, bdev_io->status);
|
2016-11-07 20:14:47 +00:00
|
|
|
}
|
|
|
|
|
2017-01-18 22:15:35 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io,
|
|
|
|
int *sc, int *sk, int *asc, int *ascq)
|
|
|
|
{
|
|
|
|
assert(sc != NULL);
|
|
|
|
assert(sk != NULL);
|
|
|
|
assert(asc != NULL);
|
|
|
|
assert(ascq != NULL);
|
|
|
|
|
|
|
|
switch (bdev_io->status) {
|
|
|
|
case SPDK_BDEV_IO_STATUS_SUCCESS:
|
|
|
|
*sc = SPDK_SCSI_STATUS_GOOD;
|
|
|
|
*sk = SPDK_SCSI_SENSE_NO_SENSE;
|
|
|
|
*asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
|
|
|
|
*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
|
|
|
|
break;
|
|
|
|
case SPDK_BDEV_IO_STATUS_NVME_ERROR:
|
|
|
|
spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq);
|
|
|
|
break;
|
|
|
|
case SPDK_BDEV_IO_STATUS_SCSI_ERROR:
|
|
|
|
*sc = bdev_io->error.scsi.sc;
|
|
|
|
*sk = bdev_io->error.scsi.sk;
|
|
|
|
*asc = bdev_io->error.scsi.asc;
|
|
|
|
*ascq = bdev_io->error.scsi.ascq;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
*sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
|
|
|
|
*sk = SPDK_SCSI_SENSE_ABORTED_COMMAND;
|
|
|
|
*asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
|
|
|
|
*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-18 21:43:15 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc)
|
|
|
|
{
|
|
|
|
if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) {
|
|
|
|
bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
|
|
|
} else {
|
|
|
|
bdev_io->error.nvme.sct = sct;
|
|
|
|
bdev_io->error.nvme.sc = sc;
|
|
|
|
bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
spdk_bdev_io_complete(bdev_io, bdev_io->status);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc)
|
|
|
|
{
|
|
|
|
assert(sct != NULL);
|
|
|
|
assert(sc != NULL);
|
|
|
|
|
|
|
|
if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) {
|
|
|
|
*sct = bdev_io->error.nvme.sct;
|
|
|
|
*sc = bdev_io->error.nvme.sc;
|
|
|
|
} else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) {
|
|
|
|
*sct = SPDK_NVME_SCT_GENERIC;
|
|
|
|
*sc = SPDK_NVME_SC_SUCCESS;
|
|
|
|
} else {
|
|
|
|
*sct = SPDK_NVME_SCT_GENERIC;
|
|
|
|
*sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-11 16:47:52 +00:00
|
|
|
struct spdk_thread *
|
|
|
|
spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
return spdk_io_channel_get_thread(bdev_io->ch->channel);
|
|
|
|
}
|
|
|
|
|
2017-12-28 21:49:46 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_qos_config(struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
struct spdk_conf_section *sp = NULL;
|
|
|
|
const char *val = NULL;
|
|
|
|
int ios_per_sec = 0;
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
sp = spdk_conf_find_section(NULL, "QoS");
|
|
|
|
if (!sp) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
val = spdk_conf_section_get_nmval(sp, "Limit_IOPS", i, 0);
|
|
|
|
if (!val) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strcmp(bdev->name, val) != 0) {
|
|
|
|
i++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
val = spdk_conf_section_get_nmval(sp, "Limit_IOPS", i, 1);
|
|
|
|
if (!val) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ios_per_sec = (int)strtol(val, NULL, 10);
|
|
|
|
if (ios_per_sec > 0) {
|
|
|
|
if (ios_per_sec % SPDK_BDEV_QOS_MIN_IOS_PER_SEC) {
|
|
|
|
SPDK_ERRLOG("Assigned IOPS %u on bdev %s is not multiple of %u\n",
|
|
|
|
ios_per_sec, bdev->name, SPDK_BDEV_QOS_MIN_IOS_PER_SEC);
|
|
|
|
SPDK_ERRLOG("Failed to enable QoS on this bdev %s\n", bdev->name);
|
|
|
|
} else {
|
|
|
|
bdev->ios_per_sec = (uint64_t)ios_per_sec;
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Bdev:%s QoS:%lu\n",
|
|
|
|
bdev->name, bdev->ios_per_sec);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-20 09:31:39 +00:00
|
|
|
static int
|
2018-03-23 19:35:21 +00:00
|
|
|
spdk_bdev_init(struct spdk_bdev *bdev)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-07-07 00:36:17 +00:00
|
|
|
assert(bdev->module != NULL);
|
|
|
|
|
2017-11-20 09:31:39 +00:00
|
|
|
if (!bdev->name) {
|
|
|
|
SPDK_ERRLOG("Bdev name is NULL\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (spdk_bdev_get_by_name(bdev->name)) {
|
|
|
|
SPDK_ERRLOG("Bdev name:%s already exists\n", bdev->name);
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
|
2017-06-29 18:23:50 +00:00
|
|
|
bdev->status = SPDK_BDEV_STATUS_READY;
|
|
|
|
|
|
|
|
TAILQ_INIT(&bdev->open_descs);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-11-29 15:13:17 +00:00
|
|
|
TAILQ_INIT(&bdev->aliases);
|
|
|
|
|
2017-09-12 19:14:48 +00:00
|
|
|
bdev->reset_in_progress = NULL;
|
2017-06-15 14:17:12 +00:00
|
|
|
|
2017-12-28 21:49:46 +00:00
|
|
|
_spdk_bdev_qos_config(bdev);
|
|
|
|
|
2018-02-19 22:21:15 +00:00
|
|
|
spdk_io_device_register(__bdev_to_io_dev(bdev),
|
|
|
|
spdk_bdev_channel_create, spdk_bdev_channel_destroy,
|
2017-04-04 21:01:54 +00:00
|
|
|
sizeof(struct spdk_bdev_channel));
|
|
|
|
|
2017-01-10 16:54:23 +00:00
|
|
|
pthread_mutex_init(&bdev->mutex, NULL);
|
2018-03-23 19:35:21 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-03 21:27:23 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_destroy_cb(void *io_device)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
spdk_bdev_unregister_cb cb_fn;
|
|
|
|
void *cb_arg;
|
|
|
|
|
|
|
|
bdev = __bdev_from_io_dev(io_device);
|
|
|
|
cb_fn = bdev->unregister_cb;
|
|
|
|
cb_arg = bdev->unregister_ctx;
|
|
|
|
|
|
|
|
rc = bdev->fn_table->destruct(bdev->ctxt);
|
|
|
|
if (rc < 0) {
|
|
|
|
SPDK_ERRLOG("destruct failed\n");
|
|
|
|
}
|
|
|
|
if (rc <= 0 && cb_fn != NULL) {
|
|
|
|
cb_fn(cb_arg, rc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-03-23 19:35:21 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_fini(struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
pthread_mutex_destroy(&bdev->mutex);
|
|
|
|
|
2018-04-03 21:27:23 +00:00
|
|
|
spdk_io_device_unregister(__bdev_to_io_dev(bdev), spdk_bdev_destroy_cb);
|
2018-03-23 19:35:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spdk_bdev_start(struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_module *module;
|
|
|
|
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Inserting bdev %s into list\n", bdev->name);
|
2017-05-09 21:09:28 +00:00
|
|
|
TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link);
|
2017-02-10 20:18:49 +00:00
|
|
|
|
2017-07-13 04:06:22 +00:00
|
|
|
TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) {
|
|
|
|
if (module->examine) {
|
2017-08-24 15:36:25 +00:00
|
|
|
module->action_in_progress++;
|
2017-07-13 04:06:22 +00:00
|
|
|
module->examine(bdev);
|
|
|
|
}
|
2017-02-10 20:18:49 +00:00
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-11-20 09:31:39 +00:00
|
|
|
int
|
2017-06-29 20:16:26 +00:00
|
|
|
spdk_bdev_register(struct spdk_bdev *bdev)
|
|
|
|
{
|
2018-03-23 19:35:21 +00:00
|
|
|
int rc = spdk_bdev_init(bdev);
|
|
|
|
|
|
|
|
if (rc == 0) {
|
|
|
|
spdk_bdev_start(bdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spdk_vbdev_remove_base_bdevs(struct spdk_bdev *vbdev)
|
|
|
|
{
|
|
|
|
struct spdk_bdev **bdevs;
|
|
|
|
struct spdk_bdev *base;
|
|
|
|
size_t i, j, k;
|
|
|
|
bool found;
|
|
|
|
|
|
|
|
/* Iterate over base bdevs to remove vbdev from them. */
|
|
|
|
for (i = 0; i < vbdev->base_bdevs_cnt; i++) {
|
|
|
|
found = false;
|
|
|
|
base = vbdev->base_bdevs[i];
|
|
|
|
|
|
|
|
for (j = 0; j < base->vbdevs_cnt; j++) {
|
|
|
|
if (base->vbdevs[j] != vbdev) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (k = j; k + 1 < base->vbdevs_cnt; k++) {
|
|
|
|
base->vbdevs[k] = base->vbdevs[k + 1];
|
|
|
|
}
|
|
|
|
|
|
|
|
base->vbdevs_cnt--;
|
|
|
|
if (base->vbdevs_cnt > 0) {
|
|
|
|
bdevs = realloc(base->vbdevs, base->vbdevs_cnt * sizeof(bdevs[0]));
|
|
|
|
/* It would be odd if shrinking memory block fail. */
|
|
|
|
assert(bdevs);
|
|
|
|
base->vbdevs = bdevs;
|
|
|
|
} else {
|
|
|
|
free(base->vbdevs);
|
|
|
|
base->vbdevs = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
SPDK_WARNLOG("Bdev '%s' is not base bdev of '%s'.\n", base->name, vbdev->name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free(vbdev->base_bdevs);
|
|
|
|
vbdev->base_bdevs = NULL;
|
|
|
|
vbdev->base_bdevs_cnt = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
spdk_vbdev_set_base_bdevs(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, size_t cnt)
|
|
|
|
{
|
|
|
|
struct spdk_bdev **vbdevs;
|
|
|
|
struct spdk_bdev *base;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
/* Adding base bdevs isn't supported (yet?). */
|
|
|
|
assert(vbdev->base_bdevs_cnt == 0);
|
|
|
|
|
|
|
|
vbdev->base_bdevs = malloc(cnt * sizeof(vbdev->base_bdevs[0]));
|
|
|
|
if (!vbdev->base_bdevs) {
|
|
|
|
SPDK_ERRLOG("%s - realloc() failed\n", vbdev->name);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(vbdev->base_bdevs, base_bdevs, cnt * sizeof(vbdev->base_bdevs[0]));
|
|
|
|
vbdev->base_bdevs_cnt = cnt;
|
|
|
|
|
|
|
|
/* Iterate over base bdevs to add this vbdev to them. */
|
|
|
|
for (i = 0; i < cnt; i++) {
|
|
|
|
base = vbdev->base_bdevs[i];
|
|
|
|
|
|
|
|
assert(base != NULL);
|
|
|
|
assert(base->claim_module != NULL);
|
|
|
|
|
|
|
|
vbdevs = realloc(base->vbdevs, (base->vbdevs_cnt + 1) * sizeof(vbdevs[0]));
|
|
|
|
if (!vbdevs) {
|
|
|
|
SPDK_ERRLOG("%s - realloc() failed\n", base->name);
|
|
|
|
spdk_vbdev_remove_base_bdevs(vbdev);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
vbdevs[base->vbdevs_cnt] = vbdev;
|
|
|
|
base->vbdevs = vbdevs;
|
|
|
|
base->vbdevs_cnt++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2017-06-29 20:16:26 +00:00
|
|
|
}
|
|
|
|
|
2017-11-20 09:31:39 +00:00
|
|
|
int
|
2017-06-29 20:16:26 +00:00
|
|
|
spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count)
|
|
|
|
{
|
2018-03-23 19:35:21 +00:00
|
|
|
int rc;
|
2017-11-20 09:31:39 +00:00
|
|
|
|
2018-03-23 19:35:21 +00:00
|
|
|
rc = spdk_bdev_init(vbdev);
|
2017-11-20 09:31:39 +00:00
|
|
|
if (rc) {
|
|
|
|
return rc;
|
|
|
|
}
|
2017-06-29 20:16:26 +00:00
|
|
|
|
2018-03-23 19:35:21 +00:00
|
|
|
if (base_bdev_count == 0) {
|
|
|
|
spdk_bdev_start(vbdev);
|
|
|
|
return 0;
|
2017-06-29 20:16:26 +00:00
|
|
|
}
|
2017-11-20 09:31:39 +00:00
|
|
|
|
2018-03-23 19:35:21 +00:00
|
|
|
rc = spdk_vbdev_set_base_bdevs(vbdev, base_bdevs, base_bdev_count);
|
|
|
|
if (rc) {
|
|
|
|
spdk_bdev_fini(vbdev);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
spdk_bdev_start(vbdev);
|
2017-11-20 09:31:39 +00:00
|
|
|
return 0;
|
2018-03-23 19:35:21 +00:00
|
|
|
|
2017-06-29 20:16:26 +00:00
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
void
|
2017-10-25 09:11:59 +00:00
|
|
|
spdk_bdev_unregister_done(struct spdk_bdev *bdev, int bdeverrno)
|
|
|
|
{
|
|
|
|
if (bdev->unregister_cb != NULL) {
|
|
|
|
bdev->unregister_cb(bdev->unregister_ctx, bdeverrno);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-15 23:12:15 +00:00
|
|
|
static void
|
|
|
|
_remove_notify(void *arg)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_desc *desc = arg;
|
|
|
|
|
|
|
|
desc->remove_cb(desc->remove_ctx);
|
|
|
|
}
|
|
|
|
|
2017-10-25 09:11:59 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-06-29 18:23:50 +00:00
|
|
|
struct spdk_bdev_desc *desc, *tmp;
|
2017-07-24 16:43:16 +00:00
|
|
|
bool do_destruct = true;
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Removing bdev %s from list\n", bdev->name);
|
2017-01-25 01:04:14 +00:00
|
|
|
|
|
|
|
pthread_mutex_lock(&bdev->mutex);
|
2017-06-29 18:23:50 +00:00
|
|
|
|
2018-03-23 19:35:21 +00:00
|
|
|
spdk_vbdev_remove_base_bdevs(bdev);
|
2018-01-22 10:19:55 +00:00
|
|
|
|
2017-06-29 18:23:50 +00:00
|
|
|
bdev->status = SPDK_BDEV_STATUS_REMOVING;
|
2017-10-25 09:11:59 +00:00
|
|
|
bdev->unregister_cb = cb_fn;
|
|
|
|
bdev->unregister_ctx = cb_arg;
|
2017-06-29 18:23:50 +00:00
|
|
|
|
|
|
|
TAILQ_FOREACH_SAFE(desc, &bdev->open_descs, link, tmp) {
|
|
|
|
if (desc->remove_cb) {
|
2017-07-24 16:43:16 +00:00
|
|
|
do_destruct = false;
|
2018-02-15 23:12:15 +00:00
|
|
|
/*
|
|
|
|
* Defer invocation of the remove_cb to a separate message that will
|
|
|
|
* run later on this thread. This ensures this context unwinds and
|
|
|
|
* we don't recursively unregister this bdev again if the remove_cb
|
|
|
|
* immediately closes its descriptor.
|
|
|
|
*/
|
|
|
|
spdk_thread_send_msg(spdk_get_thread(), _remove_notify, desc);
|
2017-01-25 01:04:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-24 16:43:16 +00:00
|
|
|
if (!do_destruct) {
|
2017-06-29 18:23:50 +00:00
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link);
|
2017-01-25 01:04:14 +00:00
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2018-03-23 19:35:21 +00:00
|
|
|
spdk_bdev_fini(bdev);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-06-29 18:23:50 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb,
|
|
|
|
void *remove_ctx, struct spdk_bdev_desc **_desc)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_desc *desc;
|
|
|
|
|
|
|
|
desc = calloc(1, sizeof(*desc));
|
|
|
|
if (desc == NULL) {
|
2017-10-30 07:47:50 +00:00
|
|
|
SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n");
|
2017-06-29 18:23:50 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2017-01-10 16:54:23 +00:00
|
|
|
|
|
|
|
pthread_mutex_lock(&bdev->mutex);
|
|
|
|
|
2017-09-18 12:15:34 +00:00
|
|
|
if (write && bdev->claim_module) {
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_INFOLOG(SPDK_LOG_BDEV, "Could not open %s - already claimed\n", bdev->name);
|
2017-06-29 18:23:50 +00:00
|
|
|
free(desc);
|
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
|
|
|
return -EPERM;
|
2017-01-10 16:54:23 +00:00
|
|
|
}
|
|
|
|
|
2017-06-29 18:23:50 +00:00
|
|
|
TAILQ_INSERT_TAIL(&bdev->open_descs, desc, link);
|
|
|
|
|
|
|
|
desc->bdev = bdev;
|
|
|
|
desc->remove_cb = remove_cb;
|
|
|
|
desc->remove_ctx = remove_ctx;
|
|
|
|
desc->write = write;
|
|
|
|
*_desc = desc;
|
|
|
|
|
2017-01-10 16:54:23 +00:00
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
|
|
|
|
2017-06-29 18:23:50 +00:00
|
|
|
return 0;
|
2017-01-10 16:54:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-06-29 18:23:50 +00:00
|
|
|
spdk_bdev_close(struct spdk_bdev_desc *desc)
|
2017-01-10 16:54:23 +00:00
|
|
|
{
|
2017-06-29 18:23:50 +00:00
|
|
|
struct spdk_bdev *bdev = desc->bdev;
|
2017-01-25 01:04:14 +00:00
|
|
|
bool do_unregister = false;
|
2017-01-10 16:54:23 +00:00
|
|
|
|
2017-01-25 01:04:14 +00:00
|
|
|
pthread_mutex_lock(&bdev->mutex);
|
2017-06-29 18:23:50 +00:00
|
|
|
|
|
|
|
TAILQ_REMOVE(&bdev->open_descs, desc, link);
|
|
|
|
free(desc);
|
|
|
|
|
2017-07-17 10:00:08 +00:00
|
|
|
if (bdev->status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->open_descs)) {
|
2017-01-25 01:04:14 +00:00
|
|
|
do_unregister = true;
|
|
|
|
}
|
2017-01-10 16:54:23 +00:00
|
|
|
pthread_mutex_unlock(&bdev->mutex);
|
2017-01-25 01:04:14 +00:00
|
|
|
|
|
|
|
if (do_unregister == true) {
|
2017-10-25 09:11:59 +00:00
|
|
|
spdk_bdev_unregister(bdev, bdev->unregister_cb, bdev->unregister_ctx);
|
2017-01-25 01:04:14 +00:00
|
|
|
}
|
2017-01-10 16:54:23 +00:00
|
|
|
}
|
|
|
|
|
2017-07-07 23:04:52 +00:00
|
|
|
int
|
2017-07-13 04:06:22 +00:00
|
|
|
spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *module)
|
2017-07-07 23:04:52 +00:00
|
|
|
{
|
2017-07-13 04:06:22 +00:00
|
|
|
if (bdev->claim_module != NULL) {
|
2017-07-07 23:04:52 +00:00
|
|
|
SPDK_ERRLOG("bdev %s already claimed by module %s\n", bdev->name,
|
2017-07-13 04:06:22 +00:00
|
|
|
bdev->claim_module->name);
|
2017-07-07 23:04:52 +00:00
|
|
|
return -EPERM;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (desc && !desc->write) {
|
|
|
|
desc->write = true;
|
|
|
|
}
|
|
|
|
|
2017-07-13 04:06:22 +00:00
|
|
|
bdev->claim_module = module;
|
2017-07-07 23:04:52 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-07-13 04:06:22 +00:00
|
|
|
spdk_bdev_module_release_bdev(struct spdk_bdev *bdev)
|
2017-07-07 23:04:52 +00:00
|
|
|
{
|
2017-07-13 04:06:22 +00:00
|
|
|
assert(bdev->claim_module != NULL);
|
|
|
|
bdev->claim_module = NULL;
|
2017-07-07 23:04:52 +00:00
|
|
|
}
|
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc)
|
|
|
|
{
|
|
|
|
return desc->bdev;
|
|
|
|
}
|
|
|
|
|
2017-05-04 20:57:07 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp)
|
|
|
|
{
|
|
|
|
struct iovec *iovs;
|
|
|
|
int iovcnt;
|
|
|
|
|
|
|
|
if (bdev_io == NULL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (bdev_io->type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
2017-09-20 13:10:17 +00:00
|
|
|
iovs = bdev_io->u.bdev.iovs;
|
|
|
|
iovcnt = bdev_io->u.bdev.iovcnt;
|
2017-05-04 20:57:07 +00:00
|
|
|
break;
|
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
2017-09-20 13:10:17 +00:00
|
|
|
iovs = bdev_io->u.bdev.iovs;
|
|
|
|
iovcnt = bdev_io->u.bdev.iovcnt;
|
2017-05-04 20:57:07 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
iovs = NULL;
|
|
|
|
iovcnt = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (iovp) {
|
|
|
|
*iovp = iovs;
|
|
|
|
}
|
|
|
|
if (iovcntp) {
|
|
|
|
*iovcntp = iovcnt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-09 21:01:12 +00:00
|
|
|
void
|
2018-03-09 22:20:21 +00:00
|
|
|
spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2018-03-06 18:52:46 +00:00
|
|
|
|
|
|
|
if (spdk_bdev_module_list_find(bdev_module->name)) {
|
|
|
|
fprintf(stderr, "ERROR: module '%s' already registered.\n", bdev_module->name);
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdev_module->async_init) {
|
|
|
|
bdev_module->action_in_progress = 1;
|
|
|
|
}
|
|
|
|
|
2017-07-13 04:06:22 +00:00
|
|
|
/*
|
|
|
|
* Modules with examine callbacks must be initialized first, so they are
|
|
|
|
* ready to handle examine callbacks from later modules that will
|
|
|
|
* register physical bdevs.
|
|
|
|
*/
|
|
|
|
if (bdev_module->examine != NULL) {
|
|
|
|
TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, tailq);
|
|
|
|
} else {
|
|
|
|
TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, tailq);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
2017-08-25 21:22:46 +00:00
|
|
|
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *
|
2018-03-06 18:52:46 +00:00
|
|
|
spdk_bdev_module_list_find(const char *name)
|
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *bdev_module;
|
2018-03-06 18:52:46 +00:00
|
|
|
|
|
|
|
TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) {
|
|
|
|
if (strcmp(name, bdev_module->name) == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return bdev_module;
|
|
|
|
}
|
|
|
|
|
2017-07-28 22:34:24 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
|
|
{
|
|
|
|
uint64_t len;
|
|
|
|
|
|
|
|
if (!success) {
|
2018-03-16 01:20:14 +00:00
|
|
|
bdev_io->cb = bdev_io->u.bdev.stored_user_cb;
|
2017-10-23 20:43:50 +00:00
|
|
|
_spdk_bdev_io_complete(bdev_io);
|
2017-07-28 22:34:24 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* no need to perform the error checking from write_zeroes_blocks because this request already passed those checks. */
|
2018-03-16 01:20:14 +00:00
|
|
|
len = spdk_min(spdk_bdev_get_block_size(bdev_io->bdev) * bdev_io->u.bdev.split_remaining_num_blocks,
|
2017-07-28 22:34:24 +00:00
|
|
|
ZERO_BUFFER_SIZE);
|
|
|
|
|
2018-03-16 01:20:14 +00:00
|
|
|
bdev_io->u.bdev.offset_blocks = bdev_io->u.bdev.split_current_offset_blocks;
|
2017-07-28 22:34:24 +00:00
|
|
|
bdev_io->u.bdev.iov.iov_len = len;
|
|
|
|
bdev_io->u.bdev.num_blocks = len / spdk_bdev_get_block_size(bdev_io->bdev);
|
2018-03-16 01:20:14 +00:00
|
|
|
bdev_io->u.bdev.split_remaining_num_blocks -= bdev_io->u.bdev.num_blocks;
|
|
|
|
bdev_io->u.bdev.split_current_offset_blocks += bdev_io->u.bdev.num_blocks;
|
2017-07-28 22:34:24 +00:00
|
|
|
|
|
|
|
/* if this round completes the i/o, change the callback to be the original user callback */
|
2018-03-16 01:20:14 +00:00
|
|
|
if (bdev_io->u.bdev.split_remaining_num_blocks == 0) {
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev_io->bdev, cb_arg, bdev_io->u.bdev.stored_user_cb);
|
2017-07-28 22:34:24 +00:00
|
|
|
} else {
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev_io->bdev, cb_arg, spdk_bdev_write_zeroes_split);
|
|
|
|
}
|
|
|
|
spdk_bdev_io_submit(bdev_io);
|
|
|
|
}
|
|
|
|
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_LOG_REGISTER_COMPONENT("bdev", SPDK_LOG_BDEV)
|