2016-07-20 18:16:23 +00:00
|
|
|
/*-
|
|
|
|
* BSD LICENSE
|
|
|
|
*
|
|
|
|
* Copyright (c) Intel Corporation.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in
|
|
|
|
* the documentation and/or other materials provided with the
|
|
|
|
* distribution.
|
|
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived
|
|
|
|
* from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2017-05-02 18:18:25 +00:00
|
|
|
#include "spdk/stdinc.h"
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-02 18:18:25 +00:00
|
|
|
#include "spdk/bdev.h"
|
2017-12-28 21:49:46 +00:00
|
|
|
#include "spdk/conf.h"
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2018-09-27 19:38:15 +00:00
|
|
|
#include "spdk/config.h"
|
2017-03-27 19:59:40 +00:00
|
|
|
#include "spdk/env.h"
|
2017-10-25 13:58:02 +00:00
|
|
|
#include "spdk/event.h"
|
2018-06-11 20:32:15 +00:00
|
|
|
#include "spdk/thread.h"
|
2017-05-09 21:32:49 +00:00
|
|
|
#include "spdk/likely.h"
|
2016-07-20 18:16:23 +00:00
|
|
|
#include "spdk/queue.h"
|
2017-01-18 21:43:15 +00:00
|
|
|
#include "spdk/nvme_spec.h"
|
2017-01-18 22:15:35 +00:00
|
|
|
#include "spdk/scsi_spec.h"
|
2019-03-22 11:19:42 +00:00
|
|
|
#include "spdk/notify.h"
|
2017-08-29 05:24:52 +00:00
|
|
|
#include "spdk/util.h"
|
2018-08-31 20:13:32 +00:00
|
|
|
#include "spdk/trace.h"
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2018-05-23 21:01:03 +00:00
|
|
|
#include "spdk/bdev_module.h"
|
2016-11-07 22:10:28 +00:00
|
|
|
#include "spdk_internal/log.h"
|
2017-05-30 08:45:46 +00:00
|
|
|
#include "spdk/string.h"
|
|
|
|
|
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
#include "ittnotify.h"
|
2017-06-15 16:59:02 +00:00
|
|
|
#include "ittnotify_types.h"
|
|
|
|
int __itt_init_ittlib(const char *, __itt_group_id);
|
2017-05-30 08:45:46 +00:00
|
|
|
#endif
|
2016-11-07 22:10:28 +00:00
|
|
|
|
2019-03-22 18:46:31 +00:00
|
|
|
#define SPDK_BDEV_IO_POOL_SIZE (64 * 1024 - 1)
|
2017-12-28 03:11:55 +00:00
|
|
|
#define SPDK_BDEV_IO_CACHE_SIZE 256
|
2019-03-22 18:46:31 +00:00
|
|
|
#define BUF_SMALL_POOL_SIZE 8191
|
|
|
|
#define BUF_LARGE_POOL_SIZE 1023
|
2017-12-28 03:11:55 +00:00
|
|
|
#define NOMEM_THRESHOLD_COUNT 8
|
|
|
|
#define ZERO_BUFFER_SIZE 0x100000
|
2018-08-31 20:13:32 +00:00
|
|
|
|
|
|
|
#define OWNER_BDEV 0x2
|
|
|
|
|
|
|
|
#define OBJECT_BDEV_IO 0x2
|
|
|
|
|
|
|
|
#define TRACE_GROUP_BDEV 0x3
|
|
|
|
#define TRACE_BDEV_IO_START SPDK_TPOINT_ID(TRACE_GROUP_BDEV, 0x0)
|
|
|
|
#define TRACE_BDEV_IO_DONE SPDK_TPOINT_ID(TRACE_GROUP_BDEV, 0x1)
|
|
|
|
|
2017-12-28 03:11:55 +00:00
|
|
|
#define SPDK_BDEV_QOS_TIMESLICE_IN_USEC 1000
|
|
|
|
#define SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE 1
|
2018-06-05 08:21:14 +00:00
|
|
|
#define SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE 512
|
2019-05-15 19:41:36 +00:00
|
|
|
#define SPDK_BDEV_QOS_MIN_IOS_PER_SEC 1000
|
|
|
|
#define SPDK_BDEV_QOS_MIN_BYTES_PER_SEC (1024 * 1024)
|
2018-09-04 15:01:51 +00:00
|
|
|
#define SPDK_BDEV_QOS_LIMIT_NOT_DEFINED UINT64_MAX
|
2018-06-04 06:13:06 +00:00
|
|
|
|
2018-10-11 07:19:34 +00:00
|
|
|
#define SPDK_BDEV_POOL_ALIGNMENT 512
|
|
|
|
|
2018-06-22 02:15:02 +00:00
|
|
|
static const char *qos_conf_type[] = {"Limit_IOPS",
|
|
|
|
"Limit_BPS", "Limit_Read_BPS", "Limit_Write_BPS"
|
|
|
|
};
|
|
|
|
static const char *qos_rpc_type[] = {"rw_ios_per_sec",
|
|
|
|
"rw_mbytes_per_sec", "r_mbytes_per_sec", "w_mbytes_per_sec"
|
|
|
|
};
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2018-07-30 21:09:07 +00:00
|
|
|
TAILQ_HEAD(spdk_bdev_list, spdk_bdev);
|
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
struct spdk_bdev_mgr {
|
|
|
|
struct spdk_mempool *bdev_io_pool;
|
|
|
|
|
|
|
|
struct spdk_mempool *buf_small_pool;
|
|
|
|
struct spdk_mempool *buf_large_pool;
|
|
|
|
|
2017-07-28 22:34:24 +00:00
|
|
|
void *zero_buffer;
|
|
|
|
|
2018-08-02 15:57:18 +00:00
|
|
|
TAILQ_HEAD(bdev_module_list, spdk_bdev_module) bdev_modules;
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2018-07-30 21:09:07 +00:00
|
|
|
struct spdk_bdev_list bdevs;
|
2017-05-30 08:45:46 +00:00
|
|
|
|
2017-07-10 23:36:35 +00:00
|
|
|
bool init_complete;
|
|
|
|
bool module_init_complete;
|
|
|
|
|
2019-06-18 12:18:32 +00:00
|
|
|
pthread_mutex_t mutex;
|
|
|
|
|
2017-05-30 08:45:46 +00:00
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
__itt_domain *domain;
|
|
|
|
#endif
|
2017-05-09 21:09:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct spdk_bdev_mgr g_bdev_mgr = {
|
|
|
|
.bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules),
|
|
|
|
.bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs),
|
2017-07-10 23:36:35 +00:00
|
|
|
.init_complete = false,
|
|
|
|
.module_init_complete = false,
|
2019-06-18 12:18:32 +00:00
|
|
|
.mutex = PTHREAD_MUTEX_INITIALIZER,
|
2017-05-09 21:09:28 +00:00
|
|
|
};
|
2016-08-01 21:31:02 +00:00
|
|
|
|
2019-06-18 12:18:32 +00:00
|
|
|
|
2018-06-11 12:35:11 +00:00
|
|
|
static struct spdk_bdev_opts g_bdev_opts = {
|
|
|
|
.bdev_io_pool_size = SPDK_BDEV_IO_POOL_SIZE,
|
|
|
|
.bdev_io_cache_size = SPDK_BDEV_IO_CACHE_SIZE,
|
|
|
|
};
|
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
static spdk_bdev_init_cb g_init_cb_fn = NULL;
|
|
|
|
static void *g_init_cb_arg = NULL;
|
|
|
|
|
|
|
|
static spdk_bdev_fini_cb g_fini_cb_fn = NULL;
|
|
|
|
static void *g_fini_cb_arg = NULL;
|
2017-11-21 17:45:27 +00:00
|
|
|
static struct spdk_thread *g_fini_thread = NULL;
|
2017-05-26 04:58:04 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
struct spdk_bdev_qos_limit {
|
|
|
|
/** IOs or bytes allowed per second (i.e., 1s). */
|
|
|
|
uint64_t limit;
|
|
|
|
|
|
|
|
/** Remaining IOs or bytes allowed in current timeslice (e.g., 1ms).
|
|
|
|
* For remaining bytes, allowed to run negative if an I/O is submitted when
|
|
|
|
* some bytes are remaining, but the I/O is bigger than that amount. The
|
|
|
|
* excess will be deducted from the next timeslice.
|
|
|
|
*/
|
|
|
|
int64_t remaining_this_timeslice;
|
2018-06-04 06:13:06 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
/** Minimum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
|
|
|
|
uint32_t min_per_timeslice;
|
|
|
|
|
|
|
|
/** Maximum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
|
|
|
|
uint32_t max_per_timeslice;
|
2018-12-24 21:46:42 +00:00
|
|
|
|
|
|
|
/** Function to check whether to queue the IO. */
|
|
|
|
bool (*queue_io)(const struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);
|
|
|
|
|
|
|
|
/** Function to update for the submitted IO. */
|
|
|
|
void (*update_quota)(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);
|
2018-09-04 15:01:51 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct spdk_bdev_qos {
|
|
|
|
/** Types of structure of rate limits. */
|
|
|
|
struct spdk_bdev_qos_limit rate_limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
|
2018-04-24 22:44:14 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
/** The channel that all I/O are funneled through. */
|
2018-04-24 22:44:14 +00:00
|
|
|
struct spdk_bdev_channel *ch;
|
|
|
|
|
|
|
|
/** The thread on which the poller is running. */
|
|
|
|
struct spdk_thread *thread;
|
|
|
|
|
|
|
|
/** Queue of I/O waiting to be issued. */
|
|
|
|
bdev_io_tailq_t queued;
|
|
|
|
|
2018-08-27 19:17:31 +00:00
|
|
|
/** Size of a timeslice in tsc ticks. */
|
|
|
|
uint64_t timeslice_size;
|
|
|
|
|
|
|
|
/** Timestamp of start of last timeslice. */
|
|
|
|
uint64_t last_timeslice;
|
|
|
|
|
2018-08-24 09:09:27 +00:00
|
|
|
/** Poller that processes queued I/O commands each time slice. */
|
2018-04-24 22:44:14 +00:00
|
|
|
struct spdk_poller *poller;
|
|
|
|
};
|
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
struct spdk_bdev_mgmt_channel {
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t need_buf_small;
|
|
|
|
bdev_io_stailq_t need_buf_large;
|
2017-12-20 15:20:23 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Each thread keeps a cache of bdev_io - this allows
|
|
|
|
* bdev threads which are *not* DPDK threads to still
|
|
|
|
* benefit from a per-thread bdev_io cache. Without
|
|
|
|
* this, non-DPDK threads fetching from the mempool
|
|
|
|
* incur a cmpxchg on get and put.
|
|
|
|
*/
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t per_thread_cache;
|
2017-12-20 15:20:23 +00:00
|
|
|
uint32_t per_thread_cache_count;
|
2018-06-11 12:35:11 +00:00
|
|
|
uint32_t bdev_io_cache_size;
|
2018-05-04 07:58:01 +00:00
|
|
|
|
2018-06-12 15:11:31 +00:00
|
|
|
TAILQ_HEAD(, spdk_bdev_shared_resource) shared_resources;
|
|
|
|
TAILQ_HEAD(, spdk_bdev_io_wait_entry) io_wait_queue;
|
2017-05-09 22:07:56 +00:00
|
|
|
};
|
|
|
|
|
2018-04-06 21:03:42 +00:00
|
|
|
/*
|
2018-05-04 08:10:52 +00:00
|
|
|
* Per-module (or per-io_device) data. Multiple bdevs built on the same io_device
|
2018-08-24 09:09:27 +00:00
|
|
|
* will queue here their IO that awaits retry. It makes it possible to retry sending
|
2018-04-06 21:03:42 +00:00
|
|
|
* IO to one bdev after IO from other bdev completes.
|
|
|
|
*/
|
2018-05-04 08:10:52 +00:00
|
|
|
struct spdk_bdev_shared_resource {
|
2018-04-06 20:27:34 +00:00
|
|
|
/* The bdev management channel */
|
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_ch;
|
|
|
|
|
2018-04-06 21:03:42 +00:00
|
|
|
/*
|
|
|
|
* Count of I/O submitted to bdev module and waiting for completion.
|
|
|
|
* Incremented before submit_request() is called on an spdk_bdev_io.
|
|
|
|
*/
|
|
|
|
uint64_t io_outstanding;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Queue of IO awaiting retry because of a previous NOMEM status returned
|
|
|
|
* on this channel.
|
|
|
|
*/
|
|
|
|
bdev_io_tailq_t nomem_io;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Threshold which io_outstanding must drop to before retrying nomem_io.
|
|
|
|
*/
|
|
|
|
uint64_t nomem_threshold;
|
|
|
|
|
2018-05-04 07:58:01 +00:00
|
|
|
/* I/O channel allocated by a bdev module */
|
2018-05-04 08:10:52 +00:00
|
|
|
struct spdk_io_channel *shared_ch;
|
2018-05-04 07:58:01 +00:00
|
|
|
|
2018-05-04 08:10:52 +00:00
|
|
|
/* Refcount of bdev channels using this resource */
|
2018-05-04 07:58:01 +00:00
|
|
|
uint32_t ref;
|
|
|
|
|
2018-05-04 08:10:52 +00:00
|
|
|
TAILQ_ENTRY(spdk_bdev_shared_resource) link;
|
2017-06-29 18:23:50 +00:00
|
|
|
};
|
|
|
|
|
2017-09-08 18:44:50 +00:00
|
|
|
#define BDEV_CH_RESET_IN_PROGRESS (1 << 0)
|
2017-12-28 03:11:55 +00:00
|
|
|
#define BDEV_CH_QOS_ENABLED (1 << 1)
|
2017-09-08 18:44:50 +00:00
|
|
|
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel {
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
|
|
|
/* The channel for the underlying device */
|
|
|
|
struct spdk_io_channel *channel;
|
2017-05-09 22:07:56 +00:00
|
|
|
|
2018-05-04 08:10:52 +00:00
|
|
|
/* Per io_device per thread data */
|
|
|
|
struct spdk_bdev_shared_resource *shared_resource;
|
2017-04-06 21:40:29 +00:00
|
|
|
|
|
|
|
struct spdk_bdev_io_stat stat;
|
2017-05-30 08:45:46 +00:00
|
|
|
|
2018-03-16 12:20:55 +00:00
|
|
|
/*
|
|
|
|
* Count of I/O submitted through this channel and waiting for completion.
|
|
|
|
* Incremented before submit_request() is called on an spdk_bdev_io.
|
|
|
|
*/
|
|
|
|
uint64_t io_outstanding;
|
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
bdev_io_tailq_t queued_resets;
|
|
|
|
|
|
|
|
uint32_t flags;
|
|
|
|
|
2018-11-19 13:31:19 +00:00
|
|
|
struct spdk_histogram_data *histogram;
|
|
|
|
|
2018-01-14 11:48:01 +00:00
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
uint64_t start_tsc;
|
|
|
|
uint64_t interval_tsc;
|
|
|
|
__itt_string_handle *handle;
|
2018-05-23 18:30:01 +00:00
|
|
|
struct spdk_bdev_io_stat prev_stat;
|
2018-01-14 11:48:01 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2018-04-06 21:03:42 +00:00
|
|
|
struct spdk_bdev_desc {
|
|
|
|
struct spdk_bdev *bdev;
|
2018-09-05 23:14:47 +00:00
|
|
|
struct spdk_thread *thread;
|
2018-04-06 21:03:42 +00:00
|
|
|
spdk_bdev_remove_cb_t remove_cb;
|
|
|
|
void *remove_ctx;
|
2018-06-06 13:46:13 +00:00
|
|
|
bool remove_scheduled;
|
2018-08-24 07:46:11 +00:00
|
|
|
bool closed;
|
2018-04-06 21:03:42 +00:00
|
|
|
bool write;
|
|
|
|
TAILQ_ENTRY(spdk_bdev_desc) link;
|
|
|
|
};
|
|
|
|
|
2017-12-28 09:03:17 +00:00
|
|
|
struct spdk_bdev_iostat_ctx {
|
|
|
|
struct spdk_bdev_io_stat *stat;
|
|
|
|
spdk_bdev_get_device_stat_cb cb;
|
|
|
|
void *cb_arg;
|
|
|
|
};
|
|
|
|
|
2019-02-01 19:21:03 +00:00
|
|
|
struct set_qos_limit_ctx {
|
|
|
|
void (*cb_fn)(void *cb_arg, int status);
|
|
|
|
void *cb_arg;
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
};
|
|
|
|
|
2018-02-19 22:21:15 +00:00
|
|
|
#define __bdev_to_io_dev(bdev) (((char *)bdev) + 1)
|
|
|
|
#define __bdev_from_io_dev(io_dev) ((struct spdk_bdev *)(((char *)io_dev) - 1))
|
|
|
|
|
2018-08-23 22:08:17 +00:00
|
|
|
static void _spdk_bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success,
|
|
|
|
void *cb_arg);
|
|
|
|
static void _spdk_bdev_write_zero_buffer_next(void *_bdev_io);
|
2017-07-28 22:34:24 +00:00
|
|
|
|
2019-02-01 19:21:03 +00:00
|
|
|
static void _spdk_bdev_enable_qos_msg(struct spdk_io_channel_iter *i);
|
|
|
|
static void _spdk_bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status);
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
static int
|
|
|
|
_spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
|
|
|
|
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg);
|
|
|
|
static int
|
|
|
|
_spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt, void *md_buf,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg);
|
|
|
|
|
2018-06-11 12:35:11 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_get_opts(struct spdk_bdev_opts *opts)
|
|
|
|
{
|
|
|
|
*opts = g_bdev_opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_set_opts(struct spdk_bdev_opts *opts)
|
|
|
|
{
|
2018-06-13 08:35:15 +00:00
|
|
|
uint32_t min_pool_size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add 1 to the thread count to account for the extra mgmt_ch that gets created during subsystem
|
|
|
|
* initialization. A second mgmt_ch will be created on the same thread when the application starts
|
|
|
|
* but before the deferred put_io_channel event is executed for the first mgmt_ch.
|
|
|
|
*/
|
|
|
|
min_pool_size = opts->bdev_io_cache_size * (spdk_thread_get_count() + 1);
|
|
|
|
if (opts->bdev_io_pool_size < min_pool_size) {
|
2018-06-11 12:35:11 +00:00
|
|
|
SPDK_ERRLOG("bdev_io_pool_size %" PRIu32 " is not compatible with bdev_io_cache_size %" PRIu32
|
|
|
|
" and %" PRIu32 " threads\n", opts->bdev_io_pool_size, opts->bdev_io_cache_size,
|
|
|
|
spdk_thread_get_count());
|
2018-06-13 08:35:15 +00:00
|
|
|
SPDK_ERRLOG("bdev_io_pool_size must be at least %" PRIu32 "\n", min_pool_size);
|
2018-06-11 12:35:11 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
g_bdev_opts = *opts;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-05-09 21:01:12 +00:00
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_first(void)
|
2016-08-01 21:31:02 +00:00
|
|
|
{
|
2016-08-02 17:07:34 +00:00
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs);
|
2016-08-02 17:07:34 +00:00
|
|
|
if (bdev) {
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name);
|
2016-08-02 17:07:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
2016-08-01 21:31:02 +00:00
|
|
|
}
|
|
|
|
|
2017-05-09 21:01:12 +00:00
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_next(struct spdk_bdev *prev)
|
2016-08-01 21:31:02 +00:00
|
|
|
{
|
2016-08-02 17:07:34 +00:00
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev = TAILQ_NEXT(prev, internal.link);
|
2016-08-02 17:07:34 +00:00
|
|
|
if (bdev) {
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name);
|
2016-08-02 17:07:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
2016-08-01 21:31:02 +00:00
|
|
|
}
|
|
|
|
|
2017-06-29 20:16:26 +00:00
|
|
|
static struct spdk_bdev *
|
|
|
|
_bdev_next_leaf(struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
while (bdev != NULL) {
|
2018-06-21 20:03:02 +00:00
|
|
|
if (bdev->internal.claim_module == NULL) {
|
2017-06-29 20:16:26 +00:00
|
|
|
return bdev;
|
|
|
|
} else {
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev = TAILQ_NEXT(bdev, internal.link);
|
2017-06-29 20:16:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_first_leaf(void)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
|
|
|
bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs));
|
|
|
|
|
|
|
|
if (bdev) {
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name);
|
2017-06-29 20:16:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_next_leaf(struct spdk_bdev *prev)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev = _bdev_next_leaf(TAILQ_NEXT(prev, internal.link));
|
2017-06-29 20:16:26 +00:00
|
|
|
|
|
|
|
if (bdev) {
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name);
|
2017-06-29 20:16:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return bdev;
|
|
|
|
}
|
|
|
|
|
2017-05-09 21:01:12 +00:00
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_get_by_name(const char *bdev_name)
|
2016-08-01 21:31:02 +00:00
|
|
|
{
|
2017-11-29 15:13:17 +00:00
|
|
|
struct spdk_bdev_alias *tmp;
|
2016-08-01 21:31:02 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_first();
|
|
|
|
|
|
|
|
while (bdev != NULL) {
|
2017-06-02 17:25:43 +00:00
|
|
|
if (strcmp(bdev_name, bdev->name) == 0) {
|
2016-08-02 17:07:34 +00:00
|
|
|
return bdev;
|
2016-08-01 21:31:02 +00:00
|
|
|
}
|
2017-11-29 15:13:17 +00:00
|
|
|
|
|
|
|
TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
|
|
|
|
if (strcmp(bdev_name, tmp->alias) == 0) {
|
|
|
|
return bdev;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-01 21:31:02 +00:00
|
|
|
bdev = spdk_bdev_next(bdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-07-11 20:48:53 +00:00
|
|
|
void
|
2018-06-25 18:41:17 +00:00
|
|
|
spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2018-07-11 20:48:53 +00:00
|
|
|
struct iovec *iovs;
|
2018-06-25 18:03:11 +00:00
|
|
|
|
2017-11-07 22:05:19 +00:00
|
|
|
if (bdev_io->u.bdev.iovs == NULL) {
|
|
|
|
bdev_io->u.bdev.iovs = &bdev_io->iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = 1;
|
|
|
|
}
|
|
|
|
|
2018-07-11 20:48:53 +00:00
|
|
|
iovs = bdev_io->u.bdev.iovs;
|
2018-06-25 18:41:17 +00:00
|
|
|
|
2018-07-11 20:48:53 +00:00
|
|
|
assert(iovs != NULL);
|
|
|
|
assert(bdev_io->u.bdev.iovcnt >= 1);
|
2016-10-04 14:39:27 +00:00
|
|
|
|
2018-07-11 20:48:53 +00:00
|
|
|
iovs[0].iov_base = buf;
|
|
|
|
iovs[0].iov_len = len;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
|
|
|
|
{
|
|
|
|
assert((len / spdk_bdev_get_md_size(bdev_io->bdev)) >= bdev_io->u.bdev.num_blocks);
|
|
|
|
bdev_io->u.bdev.md_buf = md_buf;
|
|
|
|
}
|
|
|
|
|
2018-10-12 07:46:14 +00:00
|
|
|
static bool
|
2019-04-16 08:12:09 +00:00
|
|
|
_is_buf_allocated(const struct iovec *iovs)
|
2018-10-12 07:46:14 +00:00
|
|
|
{
|
2017-11-07 22:05:19 +00:00
|
|
|
if (iovs == NULL) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-10-12 07:46:14 +00:00
|
|
|
return iovs[0].iov_base != NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
_are_iovs_aligned(struct iovec *iovs, int iovcnt, uint32_t alignment)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
uintptr_t iov_base;
|
|
|
|
|
|
|
|
if (spdk_likely(alignment == 1)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < iovcnt; i++) {
|
|
|
|
iov_base = (uintptr_t)iovs[i].iov_base;
|
|
|
|
if ((iov_base & (alignment - 1)) != 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_copy_iovs_to_buf(void *buf, size_t buf_len, struct iovec *iovs, int iovcnt)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
for (i = 0; i < iovcnt; i++) {
|
|
|
|
len = spdk_min(iovs[i].iov_len, buf_len);
|
|
|
|
memcpy(buf, iovs[i].iov_base, len);
|
|
|
|
buf += len;
|
|
|
|
buf_len -= len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_copy_buf_to_iovs(struct iovec *iovs, int iovcnt, void *buf, size_t buf_len)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
for (i = 0; i < iovcnt; i++) {
|
|
|
|
len = spdk_min(iovs[i].iov_len, buf_len);
|
|
|
|
memcpy(iovs[i].iov_base, buf, len);
|
|
|
|
buf += len;
|
|
|
|
buf_len -= len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_bdev_io_set_bounce_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
|
|
|
|
{
|
|
|
|
/* save original iovec */
|
|
|
|
bdev_io->internal.orig_iovs = bdev_io->u.bdev.iovs;
|
|
|
|
bdev_io->internal.orig_iovcnt = bdev_io->u.bdev.iovcnt;
|
|
|
|
/* set bounce iov */
|
|
|
|
bdev_io->u.bdev.iovs = &bdev_io->internal.bounce_iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = 1;
|
|
|
|
/* set bounce buffer for this operation */
|
|
|
|
bdev_io->u.bdev.iovs[0].iov_base = buf;
|
|
|
|
bdev_io->u.bdev.iovs[0].iov_len = len;
|
|
|
|
/* if this is write path, copy data from original buffer to bounce buffer */
|
|
|
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
|
|
|
|
_copy_iovs_to_buf(buf, len, bdev_io->internal.orig_iovs, bdev_io->internal.orig_iovcnt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
static void
|
|
|
|
_bdev_io_set_bounce_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
|
|
|
|
{
|
|
|
|
/* save original md_buf */
|
|
|
|
bdev_io->internal.orig_md_buf = bdev_io->u.bdev.md_buf;
|
|
|
|
/* set bounce md_buf */
|
|
|
|
bdev_io->u.bdev.md_buf = md_buf;
|
|
|
|
|
|
|
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
|
|
|
|
memcpy(md_buf, bdev_io->internal.orig_md_buf, len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-15 15:07:18 +00:00
|
|
|
static void
|
|
|
|
_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
|
|
|
bool buf_allocated;
|
2019-04-16 08:12:09 +00:00
|
|
|
uint64_t md_len, alignment;
|
2019-04-15 15:07:18 +00:00
|
|
|
void *aligned_buf;
|
|
|
|
|
|
|
|
alignment = spdk_bdev_get_buf_align(bdev);
|
|
|
|
buf_allocated = _is_buf_allocated(bdev_io->u.bdev.iovs);
|
|
|
|
aligned_buf = (void *)(((uintptr_t)buf + (alignment - 1)) & ~(alignment - 1));
|
|
|
|
|
|
|
|
if (buf_allocated) {
|
|
|
|
_bdev_io_set_bounce_buf(bdev_io, aligned_buf, len);
|
|
|
|
} else {
|
|
|
|
spdk_bdev_io_set_buf(bdev_io, aligned_buf, len);
|
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
if (spdk_bdev_is_md_separate(bdev)) {
|
|
|
|
aligned_buf = (char *)aligned_buf + len;
|
|
|
|
md_len = bdev_io->u.bdev.num_blocks * bdev->md_len;
|
|
|
|
|
|
|
|
assert(((uintptr_t)aligned_buf & (alignment - 1)) == 0);
|
|
|
|
|
|
|
|
if (bdev_io->u.bdev.md_buf != NULL) {
|
|
|
|
_bdev_io_set_bounce_md_buf(bdev_io, aligned_buf, md_len);
|
|
|
|
} else {
|
|
|
|
spdk_bdev_io_set_md_buf(bdev_io, aligned_buf, md_len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-15 15:07:18 +00:00
|
|
|
bdev_io->internal.buf = buf;
|
2019-05-31 07:19:49 +00:00
|
|
|
bdev_io->internal.get_buf_cb(spdk_bdev_io_get_io_channel(bdev_io), bdev_io, true);
|
2019-04-15 15:07:18 +00:00
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
static void
|
2017-05-05 20:15:51 +00:00
|
|
|
spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2019-04-16 08:12:09 +00:00
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
2017-04-11 04:36:05 +00:00
|
|
|
struct spdk_mempool *pool;
|
2016-09-19 06:06:40 +00:00
|
|
|
struct spdk_bdev_io *tmp;
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t *stailq;
|
2017-05-10 21:42:45 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch;
|
2019-04-16 08:12:09 +00:00
|
|
|
uint64_t buf_len, md_len, alignment;
|
2019-04-15 15:07:18 +00:00
|
|
|
void *buf;
|
2016-10-04 14:39:27 +00:00
|
|
|
|
2018-06-19 22:08:31 +00:00
|
|
|
buf = bdev_io->internal.buf;
|
2018-10-11 07:19:34 +00:00
|
|
|
buf_len = bdev_io->internal.buf_len;
|
2019-04-16 08:12:09 +00:00
|
|
|
md_len = spdk_bdev_is_md_separate(bdev) ? bdev_io->u.bdev.num_blocks * bdev->md_len : 0;
|
|
|
|
alignment = spdk_bdev_get_buf_align(bdev);
|
2018-06-20 17:54:48 +00:00
|
|
|
ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
|
2017-05-10 21:42:45 +00:00
|
|
|
|
2018-07-11 20:48:53 +00:00
|
|
|
bdev_io->internal.buf = NULL;
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
if (buf_len + alignment + md_len <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) +
|
2019-03-05 01:10:53 +00:00
|
|
|
SPDK_BDEV_POOL_ALIGNMENT) {
|
2017-05-09 21:09:28 +00:00
|
|
|
pool = g_bdev_mgr.buf_small_pool;
|
2018-01-05 21:55:38 +00:00
|
|
|
stailq = &ch->need_buf_small;
|
2016-07-20 18:16:23 +00:00
|
|
|
} else {
|
2017-05-09 21:09:28 +00:00
|
|
|
pool = g_bdev_mgr.buf_large_pool;
|
2018-01-05 21:55:38 +00:00
|
|
|
stailq = &ch->need_buf_large;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
if (STAILQ_EMPTY(stailq)) {
|
2017-04-11 04:36:05 +00:00
|
|
|
spdk_mempool_put(pool, buf);
|
2016-07-20 18:16:23 +00:00
|
|
|
} else {
|
2018-01-05 21:55:38 +00:00
|
|
|
tmp = STAILQ_FIRST(stailq);
|
2018-05-31 17:06:30 +00:00
|
|
|
STAILQ_REMOVE_HEAD(stailq, internal.buf_link);
|
2019-04-15 15:07:18 +00:00
|
|
|
_bdev_io_set_buf(tmp, buf, tmp->internal.buf_len);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-12 07:46:14 +00:00
|
|
|
static void
|
|
|
|
_bdev_io_unset_bounce_buf(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
2019-04-16 08:12:09 +00:00
|
|
|
if (spdk_likely(bdev_io->internal.orig_iovcnt == 0)) {
|
|
|
|
assert(bdev_io->internal.orig_md_buf == NULL);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-10-12 07:46:14 +00:00
|
|
|
/* if this is read path, copy data from bounce buffer to original buffer */
|
|
|
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
|
|
|
|
bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
|
2019-04-16 08:12:09 +00:00
|
|
|
_copy_buf_to_iovs(bdev_io->internal.orig_iovs,
|
|
|
|
bdev_io->internal.orig_iovcnt,
|
|
|
|
bdev_io->internal.bounce_iov.iov_base,
|
|
|
|
bdev_io->internal.bounce_iov.iov_len);
|
2018-10-12 07:46:14 +00:00
|
|
|
}
|
|
|
|
/* set orignal buffer for this io */
|
|
|
|
bdev_io->u.bdev.iovcnt = bdev_io->internal.orig_iovcnt;
|
|
|
|
bdev_io->u.bdev.iovs = bdev_io->internal.orig_iovs;
|
|
|
|
/* disable bouncing buffer for this io */
|
|
|
|
bdev_io->internal.orig_iovcnt = 0;
|
|
|
|
bdev_io->internal.orig_iovs = NULL;
|
2019-04-16 08:12:09 +00:00
|
|
|
|
|
|
|
/* do the same for metadata buffer */
|
|
|
|
if (spdk_unlikely(bdev_io->internal.orig_md_buf != NULL)) {
|
|
|
|
assert(spdk_bdev_is_md_separate(bdev_io->bdev));
|
|
|
|
|
|
|
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
|
|
|
|
bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
|
|
|
|
memcpy(bdev_io->internal.orig_md_buf, bdev_io->u.bdev.md_buf,
|
|
|
|
bdev_io->u.bdev.num_blocks * spdk_bdev_get_md_size(bdev_io->bdev));
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io->u.bdev.md_buf = bdev_io->internal.orig_md_buf;
|
|
|
|
bdev_io->internal.orig_md_buf = NULL;
|
|
|
|
}
|
|
|
|
|
2018-10-12 07:46:14 +00:00
|
|
|
spdk_bdev_io_put_buf(bdev_io);
|
|
|
|
}
|
|
|
|
|
2017-05-09 21:32:49 +00:00
|
|
|
void
|
2017-09-22 20:59:55 +00:00
|
|
|
spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len)
|
2017-05-09 21:32:49 +00:00
|
|
|
{
|
2019-04-16 08:12:09 +00:00
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
2017-05-09 21:32:49 +00:00
|
|
|
struct spdk_mempool *pool;
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t *stailq;
|
2018-04-06 20:27:34 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_ch;
|
2019-04-16 08:12:09 +00:00
|
|
|
uint64_t alignment, md_len;
|
2019-04-15 15:07:18 +00:00
|
|
|
void *buf;
|
2017-05-09 21:32:49 +00:00
|
|
|
|
|
|
|
assert(cb != NULL);
|
2019-04-16 08:12:09 +00:00
|
|
|
|
|
|
|
alignment = spdk_bdev_get_buf_align(bdev);
|
|
|
|
md_len = spdk_bdev_is_md_separate(bdev) ? bdev_io->u.bdev.num_blocks * bdev->md_len : 0;
|
2018-10-11 07:19:34 +00:00
|
|
|
|
2019-04-15 15:07:18 +00:00
|
|
|
if (_is_buf_allocated(bdev_io->u.bdev.iovs) &&
|
2018-10-12 07:46:14 +00:00
|
|
|
_are_iovs_aligned(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, alignment)) {
|
|
|
|
/* Buffer already present and aligned */
|
2019-05-31 07:19:49 +00:00
|
|
|
cb(spdk_bdev_io_get_io_channel(bdev_io), bdev_io, true);
|
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb
When the specified buffer size to spdk_bdev_io_get_buf() is greater
than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and
doesn't call the specified callback function.
SPDK SCSI library doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
Bdev perf tool also doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
When we support DIF insert and strip in iSCSI target, the read
buffer size iSCSI initiator requests and the read buffer size iSCSI target
requests will become different.
Even after that, iSCSI initiator and iSCSI target will negotiate correctly
not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI
initiator ignores the result of negotiation, iSCSI initiator can request
read buffer size larger than the permitted maximum, and can cause
failure in iSCSI target. This is very flagile and should be avoided.
This patch do the following
- Add the completion status of spdk_bdev_io_get_buf() to
spdk_bdev_io_get_buf_cb(),
- spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting
success to false, and return.
- spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success
is false.
Subsequent patches will process the case that success is false
in spdk_bdev_io_get_buf_cb().
Change-Id: I76429a86e18a69aa085a353ac94743296d270b82
Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-on: https://review.gerrithub.io/c/446045
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
if (len + alignment + md_len > SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_LARGE_BUF_MAX_SIZE) +
|
2019-03-05 01:10:53 +00:00
|
|
|
SPDK_BDEV_POOL_ALIGNMENT) {
|
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb
When the specified buffer size to spdk_bdev_io_get_buf() is greater
than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and
doesn't call the specified callback function.
SPDK SCSI library doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
Bdev perf tool also doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
When we support DIF insert and strip in iSCSI target, the read
buffer size iSCSI initiator requests and the read buffer size iSCSI target
requests will become different.
Even after that, iSCSI initiator and iSCSI target will negotiate correctly
not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI
initiator ignores the result of negotiation, iSCSI initiator can request
read buffer size larger than the permitted maximum, and can cause
failure in iSCSI target. This is very flagile and should be avoided.
This patch do the following
- Add the completion status of spdk_bdev_io_get_buf() to
spdk_bdev_io_get_buf_cb(),
- spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting
success to false, and return.
- spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success
is false.
Subsequent patches will process the case that success is false
in spdk_bdev_io_get_buf_cb().
Change-Id: I76429a86e18a69aa085a353ac94743296d270b82
Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-on: https://review.gerrithub.io/c/446045
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
|
|
|
SPDK_ERRLOG("Length + alignment %" PRIu64 " is larger than allowed\n",
|
|
|
|
len + alignment);
|
2019-05-31 07:19:49 +00:00
|
|
|
cb(spdk_bdev_io_get_io_channel(bdev_io), bdev_io, false);
|
2017-05-09 21:32:49 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
mgmt_ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
|
2017-05-10 21:42:45 +00:00
|
|
|
|
2018-06-19 22:08:31 +00:00
|
|
|
bdev_io->internal.buf_len = len;
|
|
|
|
bdev_io->internal.get_buf_cb = cb;
|
2018-10-11 07:19:34 +00:00
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
if (len + alignment + md_len <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) +
|
2019-03-05 01:10:53 +00:00
|
|
|
SPDK_BDEV_POOL_ALIGNMENT) {
|
2017-05-09 21:32:49 +00:00
|
|
|
pool = g_bdev_mgr.buf_small_pool;
|
2018-04-06 20:27:34 +00:00
|
|
|
stailq = &mgmt_ch->need_buf_small;
|
2017-05-09 21:32:49 +00:00
|
|
|
} else {
|
|
|
|
pool = g_bdev_mgr.buf_large_pool;
|
2018-04-06 20:27:34 +00:00
|
|
|
stailq = &mgmt_ch->need_buf_large;
|
2017-05-09 21:32:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
buf = spdk_mempool_get(pool);
|
|
|
|
if (!buf) {
|
2018-05-31 17:06:30 +00:00
|
|
|
STAILQ_INSERT_TAIL(stailq, bdev_io, internal.buf_link);
|
2017-05-09 21:32:49 +00:00
|
|
|
} else {
|
2019-04-15 15:07:18 +00:00
|
|
|
_bdev_io_set_buf(bdev_io, buf, len);
|
2017-05-09 21:32:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
static int
|
|
|
|
spdk_bdev_module_get_max_ctx_size(void)
|
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *bdev_module;
|
2016-07-20 18:16:23 +00:00
|
|
|
int max_bdev_module_size = 0;
|
|
|
|
|
2018-06-21 20:32:40 +00:00
|
|
|
TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
2016-07-20 18:16:23 +00:00
|
|
|
if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) {
|
|
|
|
max_bdev_module_size = bdev_module->get_ctx_size();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return max_bdev_module_size;
|
|
|
|
}
|
|
|
|
|
2017-06-06 20:26:04 +00:00
|
|
|
void
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_config_text(FILE *fp)
|
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *bdev_module;
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2018-06-21 20:32:40 +00:00
|
|
|
TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
2016-07-20 18:16:23 +00:00
|
|
|
if (bdev_module->config_text) {
|
|
|
|
bdev_module->config_text(fp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-22 02:15:02 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_qos_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct spdk_bdev_qos *qos = bdev->internal.qos;
|
|
|
|
uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
|
|
|
|
|
|
|
|
if (!qos) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
spdk_bdev_get_qos_rate_limits(bdev, limits);
|
|
|
|
|
|
|
|
spdk_json_write_object_begin(w);
|
|
|
|
spdk_json_write_named_string(w, "method", "set_bdev_qos_limit");
|
|
|
|
|
2019-02-01 05:34:45 +00:00
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
2018-06-22 02:15:02 +00:00
|
|
|
spdk_json_write_named_string(w, "name", bdev->name);
|
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (limits[i] > 0) {
|
|
|
|
spdk_json_write_named_uint64(w, qos_rpc_type[i], limits[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
}
|
|
|
|
|
2018-03-23 13:55:07 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w)
|
2018-02-22 12:48:13 +00:00
|
|
|
{
|
|
|
|
struct spdk_bdev_module *bdev_module;
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
|
2018-03-23 13:55:07 +00:00
|
|
|
assert(w != NULL);
|
2018-02-22 12:48:13 +00:00
|
|
|
|
|
|
|
spdk_json_write_array_begin(w);
|
|
|
|
|
2018-06-11 15:58:15 +00:00
|
|
|
spdk_json_write_object_begin(w);
|
|
|
|
spdk_json_write_named_string(w, "method", "set_bdev_options");
|
2019-02-01 05:34:45 +00:00
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
2018-06-11 15:58:15 +00:00
|
|
|
spdk_json_write_named_uint32(w, "bdev_io_pool_size", g_bdev_opts.bdev_io_pool_size);
|
|
|
|
spdk_json_write_named_uint32(w, "bdev_io_cache_size", g_bdev_opts.bdev_io_cache_size);
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
|
2018-06-21 20:32:40 +00:00
|
|
|
TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
2018-02-22 12:48:13 +00:00
|
|
|
if (bdev_module->config_json) {
|
|
|
|
bdev_module->config_json(w);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-18 12:18:32 +00:00
|
|
|
pthread_mutex_lock(&g_bdev_mgr.mutex);
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
TAILQ_FOREACH(bdev, &g_bdev_mgr.bdevs, internal.link) {
|
2018-09-04 17:19:56 +00:00
|
|
|
if (bdev->fn_table->write_config_json) {
|
|
|
|
bdev->fn_table->write_config_json(bdev, w);
|
|
|
|
}
|
2019-04-17 21:34:14 +00:00
|
|
|
|
|
|
|
spdk_bdev_qos_config_json(bdev, w);
|
2018-02-22 12:48:13 +00:00
|
|
|
}
|
|
|
|
|
2019-06-18 12:18:32 +00:00
|
|
|
pthread_mutex_unlock(&g_bdev_mgr.mutex);
|
|
|
|
|
2018-02-22 12:48:13 +00:00
|
|
|
spdk_json_write_array_end(w);
|
|
|
|
}
|
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
static int
|
|
|
|
spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf)
|
|
|
|
{
|
2017-05-10 21:42:45 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch = ctx_buf;
|
2018-06-13 08:35:15 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
uint32_t i;
|
2017-05-10 21:42:45 +00:00
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
STAILQ_INIT(&ch->need_buf_small);
|
|
|
|
STAILQ_INIT(&ch->need_buf_large);
|
2017-05-10 21:42:45 +00:00
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
STAILQ_INIT(&ch->per_thread_cache);
|
2018-06-11 12:35:11 +00:00
|
|
|
ch->bdev_io_cache_size = g_bdev_opts.bdev_io_cache_size;
|
2017-12-20 15:20:23 +00:00
|
|
|
|
2018-06-13 08:35:15 +00:00
|
|
|
/* Pre-populate bdev_io cache to ensure this thread cannot be starved. */
|
|
|
|
ch->per_thread_cache_count = 0;
|
|
|
|
for (i = 0; i < ch->bdev_io_cache_size; i++) {
|
|
|
|
bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
|
|
|
|
assert(bdev_io != NULL);
|
|
|
|
ch->per_thread_cache_count++;
|
2019-02-04 17:28:42 +00:00
|
|
|
STAILQ_INSERT_HEAD(&ch->per_thread_cache, bdev_io, internal.buf_link);
|
2018-06-13 08:35:15 +00:00
|
|
|
}
|
|
|
|
|
2018-05-04 08:10:52 +00:00
|
|
|
TAILQ_INIT(&ch->shared_resources);
|
2018-06-12 15:11:31 +00:00
|
|
|
TAILQ_INIT(&ch->io_wait_queue);
|
2018-05-04 07:58:01 +00:00
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-12-20 16:04:05 +00:00
|
|
|
static void
|
2018-04-06 20:27:34 +00:00
|
|
|
spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf)
|
2017-12-20 16:04:05 +00:00
|
|
|
{
|
2018-04-06 20:27:34 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch = ctx_buf;
|
2017-12-20 15:20:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
if (!STAILQ_EMPTY(&ch->need_buf_small) || !STAILQ_EMPTY(&ch->need_buf_large)) {
|
2018-05-04 07:58:01 +00:00
|
|
|
SPDK_ERRLOG("Pending I/O list wasn't empty on mgmt channel free\n");
|
|
|
|
}
|
|
|
|
|
2018-05-04 08:10:52 +00:00
|
|
|
if (!TAILQ_EMPTY(&ch->shared_resources)) {
|
2018-05-04 07:58:01 +00:00
|
|
|
SPDK_ERRLOG("Module channel list wasn't empty on mgmt channel free\n");
|
2017-12-20 16:04:05 +00:00
|
|
|
}
|
2017-12-20 15:20:23 +00:00
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
while (!STAILQ_EMPTY(&ch->per_thread_cache)) {
|
|
|
|
bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
|
2018-05-31 17:06:30 +00:00
|
|
|
STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
|
2017-12-20 15:20:23 +00:00
|
|
|
ch->per_thread_cache_count--;
|
|
|
|
spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(ch->per_thread_cache_count == 0);
|
2017-12-20 16:04:05 +00:00
|
|
|
}
|
|
|
|
|
2017-06-14 23:37:15 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_init_complete(int rc)
|
|
|
|
{
|
2017-10-25 13:58:02 +00:00
|
|
|
spdk_bdev_init_cb cb_fn = g_init_cb_fn;
|
|
|
|
void *cb_arg = g_init_cb_arg;
|
2018-04-18 14:46:07 +00:00
|
|
|
struct spdk_bdev_module *m;
|
2017-06-14 23:37:15 +00:00
|
|
|
|
2017-07-10 23:36:35 +00:00
|
|
|
g_bdev_mgr.init_complete = true;
|
2017-10-25 13:58:02 +00:00
|
|
|
g_init_cb_fn = NULL;
|
|
|
|
g_init_cb_arg = NULL;
|
2017-06-14 23:37:15 +00:00
|
|
|
|
2018-04-18 14:46:07 +00:00
|
|
|
/*
|
|
|
|
* For modules that need to know when subsystem init is complete,
|
|
|
|
* inform them now.
|
|
|
|
*/
|
2018-07-17 00:10:57 +00:00
|
|
|
if (rc == 0) {
|
|
|
|
TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
|
|
|
if (m->init_complete) {
|
|
|
|
m->init_complete();
|
|
|
|
}
|
2018-04-18 14:46:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-14 23:37:15 +00:00
|
|
|
cb_fn(cb_arg, rc);
|
|
|
|
}
|
|
|
|
|
2017-07-10 23:36:35 +00:00
|
|
|
static void
|
2017-08-25 07:30:10 +00:00
|
|
|
spdk_bdev_module_action_complete(void)
|
2017-07-10 23:36:35 +00:00
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *m;
|
2017-07-10 23:36:35 +00:00
|
|
|
|
|
|
|
/*
|
2017-08-24 15:36:25 +00:00
|
|
|
* Don't finish bdev subsystem initialization if
|
|
|
|
* module pre-initialization is still in progress, or
|
|
|
|
* the subsystem been already initialized.
|
|
|
|
*/
|
|
|
|
if (!g_bdev_mgr.module_init_complete || g_bdev_mgr.init_complete) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check all bdev modules for inits/examinations in progress. If any
|
2017-07-10 23:36:35 +00:00
|
|
|
* exist, return immediately since we cannot finish bdev subsystem
|
|
|
|
* initialization until all are completed.
|
|
|
|
*/
|
2018-06-21 20:32:40 +00:00
|
|
|
TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
|
|
|
if (m->internal.action_in_progress > 0) {
|
2017-07-10 23:36:35 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-25 07:30:10 +00:00
|
|
|
/*
|
|
|
|
* Modules already finished initialization - now that all
|
2017-08-24 15:36:25 +00:00
|
|
|
* the bdev modules have finished their asynchronous I/O
|
2017-08-25 07:30:10 +00:00
|
|
|
* processing, the entire bdev layer can be marked as complete.
|
|
|
|
*/
|
2017-07-13 17:36:19 +00:00
|
|
|
spdk_bdev_init_complete(0);
|
2017-07-10 23:36:35 +00:00
|
|
|
}
|
|
|
|
|
2017-08-24 15:36:25 +00:00
|
|
|
static void
|
2018-03-09 22:20:21 +00:00
|
|
|
spdk_bdev_module_action_done(struct spdk_bdev_module *module)
|
2017-08-24 15:36:25 +00:00
|
|
|
{
|
2018-06-21 20:32:40 +00:00
|
|
|
assert(module->internal.action_in_progress > 0);
|
|
|
|
module->internal.action_in_progress--;
|
2017-08-24 15:36:25 +00:00
|
|
|
spdk_bdev_module_action_complete();
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2018-03-09 22:20:21 +00:00
|
|
|
spdk_bdev_module_init_done(struct spdk_bdev_module *module)
|
2017-08-24 15:36:25 +00:00
|
|
|
{
|
|
|
|
spdk_bdev_module_action_done(module);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2018-03-09 22:20:21 +00:00
|
|
|
spdk_bdev_module_examine_done(struct spdk_bdev_module *module)
|
2017-08-24 15:36:25 +00:00
|
|
|
{
|
|
|
|
spdk_bdev_module_action_done(module);
|
|
|
|
}
|
|
|
|
|
2018-08-02 15:57:18 +00:00
|
|
|
/** The last initialized bdev module */
|
|
|
|
static struct spdk_bdev_module *g_resume_bdev_module = NULL;
|
|
|
|
|
2019-07-05 22:12:01 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_init_failed(void *cb_arg)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_module *module = cb_arg;
|
|
|
|
|
|
|
|
module->internal.action_in_progress--;
|
|
|
|
spdk_bdev_init_complete(-1);
|
|
|
|
}
|
|
|
|
|
2017-07-13 17:36:19 +00:00
|
|
|
static int
|
|
|
|
spdk_bdev_modules_init(void)
|
2017-05-26 04:58:04 +00:00
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *module;
|
2017-08-25 07:30:10 +00:00
|
|
|
int rc = 0;
|
2017-05-26 04:58:04 +00:00
|
|
|
|
2018-06-21 20:32:40 +00:00
|
|
|
TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
2018-08-02 15:57:18 +00:00
|
|
|
g_resume_bdev_module = module;
|
2019-05-15 12:29:37 +00:00
|
|
|
if (module->async_init) {
|
|
|
|
module->internal.action_in_progress = 1;
|
|
|
|
}
|
2017-07-13 17:36:19 +00:00
|
|
|
rc = module->module_init();
|
|
|
|
if (rc != 0) {
|
2019-07-05 22:12:01 +00:00
|
|
|
/* Bump action_in_progress to prevent other modules from completion of modules_init
|
|
|
|
* Send message to defer application shutdown until resources are cleaned up */
|
|
|
|
module->internal.action_in_progress = 1;
|
|
|
|
spdk_thread_send_msg(spdk_get_thread(), spdk_bdev_init_failed, module);
|
2018-08-02 15:57:18 +00:00
|
|
|
return rc;
|
2017-07-13 17:36:19 +00:00
|
|
|
}
|
2017-05-26 04:58:04 +00:00
|
|
|
}
|
|
|
|
|
2018-08-02 15:57:18 +00:00
|
|
|
g_resume_bdev_module = NULL;
|
|
|
|
return 0;
|
2017-05-26 04:58:04 +00:00
|
|
|
}
|
2018-06-11 12:35:11 +00:00
|
|
|
|
2017-06-15 19:01:53 +00:00
|
|
|
void
|
2017-11-17 21:49:36 +00:00
|
|
|
spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2018-06-11 14:35:48 +00:00
|
|
|
struct spdk_conf_section *sp;
|
|
|
|
struct spdk_bdev_opts bdev_opts;
|
|
|
|
int32_t bdev_io_pool_size, bdev_io_cache_size;
|
2017-05-10 21:42:45 +00:00
|
|
|
int cache_size;
|
2017-05-09 21:27:36 +00:00
|
|
|
int rc = 0;
|
2017-08-17 02:01:54 +00:00
|
|
|
char mempool_name[32];
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-06-14 23:37:15 +00:00
|
|
|
assert(cb_fn != NULL);
|
|
|
|
|
2018-06-11 14:35:48 +00:00
|
|
|
sp = spdk_conf_find_section(NULL, "Bdev");
|
|
|
|
if (sp != NULL) {
|
|
|
|
spdk_bdev_get_opts(&bdev_opts);
|
|
|
|
|
|
|
|
bdev_io_pool_size = spdk_conf_section_get_intval(sp, "BdevIoPoolSize");
|
|
|
|
if (bdev_io_pool_size >= 0) {
|
|
|
|
bdev_opts.bdev_io_pool_size = bdev_io_pool_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io_cache_size = spdk_conf_section_get_intval(sp, "BdevIoCacheSize");
|
|
|
|
if (bdev_io_cache_size >= 0) {
|
|
|
|
bdev_opts.bdev_io_cache_size = bdev_io_cache_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (spdk_bdev_set_opts(&bdev_opts)) {
|
|
|
|
spdk_bdev_init_complete(-1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(memcmp(&bdev_opts, &g_bdev_opts, sizeof(bdev_opts)) == 0);
|
|
|
|
}
|
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
g_init_cb_fn = cb_fn;
|
|
|
|
g_init_cb_arg = cb_arg;
|
2017-06-14 23:37:15 +00:00
|
|
|
|
2019-03-22 11:19:42 +00:00
|
|
|
spdk_notify_type_register("bdev_register");
|
|
|
|
spdk_notify_type_register("bdev_unregister");
|
|
|
|
|
2017-08-17 02:01:54 +00:00
|
|
|
snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid());
|
|
|
|
|
|
|
|
g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name,
|
2018-06-11 12:35:11 +00:00
|
|
|
g_bdev_opts.bdev_io_pool_size,
|
2017-05-09 21:09:28 +00:00
|
|
|
sizeof(struct spdk_bdev_io) +
|
|
|
|
spdk_bdev_module_get_max_ctx_size(),
|
2017-12-20 15:20:23 +00:00
|
|
|
0,
|
2017-05-09 21:09:28 +00:00
|
|
|
SPDK_ENV_SOCKET_ID_ANY);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-09 21:09:28 +00:00
|
|
|
if (g_bdev_mgr.bdev_io_pool == NULL) {
|
2017-08-18 13:22:25 +00:00
|
|
|
SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n");
|
2017-08-24 11:42:06 +00:00
|
|
|
spdk_bdev_init_complete(-1);
|
2017-07-13 17:36:19 +00:00
|
|
|
return;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-09 21:27:36 +00:00
|
|
|
/**
|
|
|
|
* Ensure no more than half of the total buffers end up local caches, by
|
2018-06-11 13:47:30 +00:00
|
|
|
* using spdk_thread_get_count() to determine how many local caches we need
|
2017-05-09 21:27:36 +00:00
|
|
|
* to account for.
|
|
|
|
*/
|
2018-06-11 13:47:30 +00:00
|
|
|
cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_thread_get_count());
|
2017-08-17 02:01:54 +00:00
|
|
|
snprintf(mempool_name, sizeof(mempool_name), "buf_small_pool_%d", getpid());
|
|
|
|
|
|
|
|
g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name,
|
2017-05-09 21:27:36 +00:00
|
|
|
BUF_SMALL_POOL_SIZE,
|
2019-03-05 01:10:53 +00:00
|
|
|
SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) +
|
|
|
|
SPDK_BDEV_POOL_ALIGNMENT,
|
2017-05-09 21:27:36 +00:00
|
|
|
cache_size,
|
|
|
|
SPDK_ENV_SOCKET_ID_ANY);
|
|
|
|
if (!g_bdev_mgr.buf_small_pool) {
|
|
|
|
SPDK_ERRLOG("create rbuf small pool failed\n");
|
2017-08-24 11:42:06 +00:00
|
|
|
spdk_bdev_init_complete(-1);
|
2017-07-13 17:36:19 +00:00
|
|
|
return;
|
2017-05-09 21:27:36 +00:00
|
|
|
}
|
|
|
|
|
2018-06-11 13:47:30 +00:00
|
|
|
cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_thread_get_count());
|
2017-08-17 02:01:54 +00:00
|
|
|
snprintf(mempool_name, sizeof(mempool_name), "buf_large_pool_%d", getpid());
|
|
|
|
|
|
|
|
g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name,
|
2017-05-09 21:27:36 +00:00
|
|
|
BUF_LARGE_POOL_SIZE,
|
2019-03-05 01:10:53 +00:00
|
|
|
SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_LARGE_BUF_MAX_SIZE) +
|
|
|
|
SPDK_BDEV_POOL_ALIGNMENT,
|
2017-05-09 21:27:36 +00:00
|
|
|
cache_size,
|
|
|
|
SPDK_ENV_SOCKET_ID_ANY);
|
|
|
|
if (!g_bdev_mgr.buf_large_pool) {
|
|
|
|
SPDK_ERRLOG("create rbuf large pool failed\n");
|
2017-08-24 11:42:06 +00:00
|
|
|
spdk_bdev_init_complete(-1);
|
2017-07-13 17:36:19 +00:00
|
|
|
return;
|
2017-05-09 21:27:36 +00:00
|
|
|
}
|
|
|
|
|
2019-04-05 08:46:35 +00:00
|
|
|
g_bdev_mgr.zero_buffer = spdk_zmalloc(ZERO_BUFFER_SIZE, ZERO_BUFFER_SIZE,
|
|
|
|
NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
|
2017-07-28 22:34:24 +00:00
|
|
|
if (!g_bdev_mgr.zero_buffer) {
|
|
|
|
SPDK_ERRLOG("create bdev zero buffer failed\n");
|
|
|
|
spdk_bdev_init_complete(-1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-05-30 08:45:46 +00:00
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
g_bdev_mgr.domain = __itt_domain_create("spdk_bdev");
|
|
|
|
#endif
|
|
|
|
|
2017-05-09 22:07:56 +00:00
|
|
|
spdk_io_device_register(&g_bdev_mgr, spdk_bdev_mgmt_channel_create,
|
|
|
|
spdk_bdev_mgmt_channel_destroy,
|
2018-08-30 20:26:50 +00:00
|
|
|
sizeof(struct spdk_bdev_mgmt_channel),
|
|
|
|
"bdev_mgr");
|
2017-05-09 22:07:56 +00:00
|
|
|
|
2017-07-13 17:36:19 +00:00
|
|
|
rc = spdk_bdev_modules_init();
|
2018-08-02 15:57:18 +00:00
|
|
|
g_bdev_mgr.module_init_complete = true;
|
2017-08-25 07:30:10 +00:00
|
|
|
if (rc != 0) {
|
|
|
|
SPDK_ERRLOG("bdev modules init failed\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
spdk_bdev_module_action_complete();
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
static void
|
2018-04-06 20:27:34 +00:00
|
|
|
spdk_bdev_mgr_unregister_cb(void *io_device)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-10-25 13:58:02 +00:00
|
|
|
spdk_bdev_fini_cb cb_fn = g_fini_cb_fn;
|
2017-05-09 21:50:43 +00:00
|
|
|
|
2018-06-11 12:35:11 +00:00
|
|
|
if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != g_bdev_opts.bdev_io_pool_size) {
|
2018-04-18 12:26:10 +00:00
|
|
|
SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n",
|
|
|
|
spdk_mempool_count(g_bdev_mgr.bdev_io_pool),
|
2018-06-11 12:35:11 +00:00
|
|
|
g_bdev_opts.bdev_io_pool_size);
|
2018-04-18 12:26:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) {
|
|
|
|
SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n",
|
|
|
|
spdk_mempool_count(g_bdev_mgr.buf_small_pool),
|
|
|
|
BUF_SMALL_POOL_SIZE);
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) {
|
|
|
|
SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n",
|
|
|
|
spdk_mempool_count(g_bdev_mgr.buf_large_pool),
|
|
|
|
BUF_LARGE_POOL_SIZE);
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
spdk_mempool_free(g_bdev_mgr.bdev_io_pool);
|
|
|
|
spdk_mempool_free(g_bdev_mgr.buf_small_pool);
|
|
|
|
spdk_mempool_free(g_bdev_mgr.buf_large_pool);
|
2019-04-05 08:46:35 +00:00
|
|
|
spdk_free(g_bdev_mgr.zero_buffer);
|
2018-04-18 12:26:10 +00:00
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
cb_fn(g_fini_cb_arg);
|
|
|
|
g_fini_cb_fn = NULL;
|
|
|
|
g_fini_cb_arg = NULL;
|
2018-09-26 10:33:05 +00:00
|
|
|
g_bdev_mgr.init_complete = false;
|
|
|
|
g_bdev_mgr.module_init_complete = false;
|
2019-06-18 12:18:32 +00:00
|
|
|
pthread_mutex_destroy(&g_bdev_mgr.mutex);
|
2017-10-25 13:58:02 +00:00
|
|
|
}
|
2017-05-09 21:50:43 +00:00
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
static void
|
2017-11-10 18:16:51 +00:00
|
|
|
spdk_bdev_module_finish_iter(void *arg)
|
2017-10-25 13:58:02 +00:00
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *bdev_module;
|
2017-11-10 18:16:51 +00:00
|
|
|
|
2019-07-15 08:17:14 +00:00
|
|
|
/* FIXME: Handling initialization failures is broken now,
|
|
|
|
* so we won't even try cleaning up after successfully
|
|
|
|
* initialized modules. if module_init_complete is false,
|
|
|
|
* just call spdk_bdev_mgr_unregister_cb
|
|
|
|
*/
|
|
|
|
if (!g_bdev_mgr.module_init_complete) {
|
|
|
|
spdk_bdev_mgr_unregister_cb(NULL);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-11-10 18:16:51 +00:00
|
|
|
/* Start iterating from the last touched module */
|
2018-04-06 20:27:34 +00:00
|
|
|
if (!g_resume_bdev_module) {
|
2018-08-02 15:57:18 +00:00
|
|
|
bdev_module = TAILQ_LAST(&g_bdev_mgr.bdev_modules, bdev_module_list);
|
2017-11-10 18:16:51 +00:00
|
|
|
} else {
|
2018-08-02 15:57:18 +00:00
|
|
|
bdev_module = TAILQ_PREV(g_resume_bdev_module, bdev_module_list,
|
|
|
|
internal.tailq);
|
2017-11-10 18:16:51 +00:00
|
|
|
}
|
|
|
|
|
2018-05-04 07:58:01 +00:00
|
|
|
while (bdev_module) {
|
|
|
|
if (bdev_module->async_fini) {
|
|
|
|
/* Save our place so we can resume later. We must
|
|
|
|
* save the variable here, before calling module_fini()
|
|
|
|
* below, because in some cases the module may immediately
|
|
|
|
* call spdk_bdev_module_finish_done() and re-enter
|
|
|
|
* this function to continue iterating. */
|
|
|
|
g_resume_bdev_module = bdev_module;
|
|
|
|
}
|
2017-11-10 18:16:51 +00:00
|
|
|
|
|
|
|
if (bdev_module->module_fini) {
|
|
|
|
bdev_module->module_fini();
|
|
|
|
}
|
|
|
|
|
2018-05-04 07:58:01 +00:00
|
|
|
if (bdev_module->async_fini) {
|
|
|
|
return;
|
2017-11-10 18:16:51 +00:00
|
|
|
}
|
2017-10-25 13:58:02 +00:00
|
|
|
|
2018-08-02 15:57:18 +00:00
|
|
|
bdev_module = TAILQ_PREV(bdev_module, bdev_module_list,
|
|
|
|
internal.tailq);
|
2017-11-10 18:16:51 +00:00
|
|
|
}
|
|
|
|
|
2018-04-06 20:27:34 +00:00
|
|
|
g_resume_bdev_module = NULL;
|
|
|
|
spdk_io_device_unregister(&g_bdev_mgr, spdk_bdev_mgr_unregister_cb);
|
2018-04-10 21:27:45 +00:00
|
|
|
}
|
|
|
|
|
2018-05-04 07:58:01 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_module_finish_done(void)
|
2017-10-25 13:58:02 +00:00
|
|
|
{
|
|
|
|
if (spdk_get_thread() != g_fini_thread) {
|
2017-11-10 18:16:51 +00:00
|
|
|
spdk_thread_send_msg(g_fini_thread, spdk_bdev_module_finish_iter, NULL);
|
2017-10-25 13:58:02 +00:00
|
|
|
} else {
|
2017-11-10 18:16:51 +00:00
|
|
|
spdk_bdev_module_finish_iter(NULL);
|
2017-10-25 13:58:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-21 17:45:05 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = cb_arg;
|
|
|
|
|
|
|
|
if (bdeverrno && bdev) {
|
|
|
|
SPDK_WARNLOG("Unable to unregister bdev '%s' during spdk_bdev_finish()\n",
|
|
|
|
bdev->name);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since the call to spdk_bdev_unregister() failed, we have no way to free this
|
|
|
|
* bdev; try to continue by manually removing this bdev from the list and continue
|
|
|
|
* with the next bdev in the list.
|
|
|
|
*/
|
2018-06-21 20:03:02 +00:00
|
|
|
TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
|
2017-11-21 17:45:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (TAILQ_EMPTY(&g_bdev_mgr.bdevs)) {
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Done unregistering bdevs\n");
|
2018-03-26 19:19:04 +00:00
|
|
|
/*
|
2018-11-06 12:02:41 +00:00
|
|
|
* Bdev module finish need to be deferred as we might be in the middle of some context
|
2018-03-26 19:19:04 +00:00
|
|
|
* (like bdev part free) that will use this bdev (or private bdev driver ctx data)
|
|
|
|
* after returning.
|
|
|
|
*/
|
|
|
|
spdk_thread_send_msg(spdk_get_thread(), spdk_bdev_module_finish_iter, NULL);
|
2017-11-21 17:45:05 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-11-06 12:02:41 +00:00
|
|
|
* Unregister last unclaimed bdev in the list, to ensure that bdev subsystem
|
|
|
|
* shutdown proceeds top-down. The goal is to give virtual bdevs an opportunity
|
|
|
|
* to detect clean shutdown as opposed to run-time hot removal of the underlying
|
|
|
|
* base bdevs.
|
|
|
|
*
|
|
|
|
* Also, walk the list in the reverse order.
|
2017-11-21 17:45:05 +00:00
|
|
|
*/
|
2018-11-06 12:02:41 +00:00
|
|
|
for (bdev = TAILQ_LAST(&g_bdev_mgr.bdevs, spdk_bdev_list);
|
|
|
|
bdev; bdev = TAILQ_PREV(bdev, spdk_bdev_list, internal.link)) {
|
|
|
|
if (bdev->internal.claim_module != NULL) {
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Skipping claimed bdev '%s'(<-'%s').\n",
|
|
|
|
bdev->name, bdev->internal.claim_module->name);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Unregistering bdev '%s'\n", bdev->name);
|
|
|
|
spdk_bdev_unregister(bdev, _spdk_bdev_finish_unregister_bdevs_iter, bdev);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If any bdev fails to unclaim underlying bdev properly, we may face the
|
|
|
|
* case of bdev list consisting of claimed bdevs only (if claims are managed
|
|
|
|
* correctly, this would mean there's a loop in the claims graph which is
|
|
|
|
* clearly impossible). Warn and unregister last bdev on the list then.
|
|
|
|
*/
|
|
|
|
for (bdev = TAILQ_LAST(&g_bdev_mgr.bdevs, spdk_bdev_list);
|
|
|
|
bdev; bdev = TAILQ_PREV(bdev, spdk_bdev_list, internal.link)) {
|
2019-07-31 14:22:23 +00:00
|
|
|
SPDK_WARNLOG("Unregistering claimed bdev '%s'!\n", bdev->name);
|
2018-11-06 12:02:41 +00:00
|
|
|
spdk_bdev_unregister(bdev, _spdk_bdev_finish_unregister_bdevs_iter, bdev);
|
|
|
|
return;
|
|
|
|
}
|
2017-11-21 17:45:05 +00:00
|
|
|
}
|
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg)
|
|
|
|
{
|
2018-08-02 18:20:56 +00:00
|
|
|
struct spdk_bdev_module *m;
|
|
|
|
|
2017-10-25 13:58:02 +00:00
|
|
|
assert(cb_fn != NULL);
|
|
|
|
|
|
|
|
g_fini_thread = spdk_get_thread();
|
|
|
|
|
|
|
|
g_fini_cb_fn = cb_fn;
|
|
|
|
g_fini_cb_arg = cb_arg;
|
|
|
|
|
2018-08-02 18:20:56 +00:00
|
|
|
TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
|
|
|
if (m->fini_start) {
|
|
|
|
m->fini_start();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-06 21:07:15 +00:00
|
|
|
_spdk_bdev_finish_unregister_bdevs_iter(NULL, 0);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-12-20 15:29:52 +00:00
|
|
|
static struct spdk_bdev_io *
|
2018-04-06 20:59:07 +00:00
|
|
|
spdk_bdev_get_io(struct spdk_bdev_channel *channel)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2018-05-04 08:10:52 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch = channel->shared_resource->mgmt_ch;
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
|
2017-12-20 15:20:23 +00:00
|
|
|
if (ch->per_thread_cache_count > 0) {
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
|
2018-05-31 17:06:30 +00:00
|
|
|
STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
|
2017-12-20 15:20:23 +00:00
|
|
|
ch->per_thread_cache_count--;
|
2018-06-12 15:11:31 +00:00
|
|
|
} else if (spdk_unlikely(!TAILQ_EMPTY(&ch->io_wait_queue))) {
|
|
|
|
/*
|
|
|
|
* Don't try to look for bdev_ios in the global pool if there are
|
|
|
|
* waiters on bdev_ios - we don't want this caller to jump the line.
|
|
|
|
*/
|
|
|
|
bdev_io = NULL;
|
2017-12-20 15:20:23 +00:00
|
|
|
} else {
|
|
|
|
bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return bdev_io;
|
|
|
|
}
|
|
|
|
|
2018-05-31 16:28:45 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2019-03-27 15:42:34 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *ch;
|
2017-12-20 15:20:23 +00:00
|
|
|
|
2018-05-31 16:28:45 +00:00
|
|
|
assert(bdev_io != NULL);
|
2018-06-19 23:19:49 +00:00
|
|
|
assert(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_PENDING);
|
2018-05-31 16:28:45 +00:00
|
|
|
|
2019-03-27 15:42:34 +00:00
|
|
|
ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
|
|
|
|
|
2018-06-19 22:08:31 +00:00
|
|
|
if (bdev_io->internal.buf != NULL) {
|
2017-05-05 20:15:51 +00:00
|
|
|
spdk_bdev_io_put_buf(bdev_io);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-06-11 12:35:11 +00:00
|
|
|
if (ch->per_thread_cache_count < ch->bdev_io_cache_size) {
|
2017-12-20 15:20:23 +00:00
|
|
|
ch->per_thread_cache_count++;
|
2019-02-04 17:28:42 +00:00
|
|
|
STAILQ_INSERT_HEAD(&ch->per_thread_cache, bdev_io, internal.buf_link);
|
2018-06-12 15:11:31 +00:00
|
|
|
while (ch->per_thread_cache_count > 0 && !TAILQ_EMPTY(&ch->io_wait_queue)) {
|
|
|
|
struct spdk_bdev_io_wait_entry *entry;
|
|
|
|
|
|
|
|
entry = TAILQ_FIRST(&ch->io_wait_queue);
|
|
|
|
TAILQ_REMOVE(&ch->io_wait_queue, entry, link);
|
|
|
|
entry->cb_fn(entry->cb_arg);
|
|
|
|
}
|
2017-12-20 15:20:23 +00:00
|
|
|
} else {
|
2018-06-12 15:11:31 +00:00
|
|
|
/* We should never have a full cache with entries on the io wait queue. */
|
|
|
|
assert(TAILQ_EMPTY(&ch->io_wait_queue));
|
2017-12-20 15:20:23 +00:00
|
|
|
spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
static bool
|
|
|
|
_spdk_bdev_qos_is_iops_rate_limit(enum spdk_bdev_qos_rate_limit_type limit)
|
|
|
|
{
|
|
|
|
assert(limit != SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
|
|
|
|
|
|
|
|
switch (limit) {
|
|
|
|
case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
|
|
|
|
return true;
|
|
|
|
case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
|
2018-06-22 02:15:02 +00:00
|
|
|
case SPDK_BDEV_QOS_R_BPS_RATE_LIMIT:
|
|
|
|
case SPDK_BDEV_QOS_W_BPS_RATE_LIMIT:
|
2018-09-04 15:01:51 +00:00
|
|
|
return false;
|
|
|
|
case SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES:
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
_spdk_bdev_qos_io_to_limit(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
switch (bdev_io->type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_NVME_IO:
|
|
|
|
case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-22 02:15:02 +00:00
|
|
|
static bool
|
|
|
|
_spdk_bdev_is_read_io(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
switch (bdev_io->type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_NVME_IO:
|
|
|
|
case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
|
|
|
|
/* Bit 1 (0x2) set for read operation */
|
|
|
|
if (bdev_io->u.nvme_passthru.cmd.opc & SPDK_NVME_OPC_READ) {
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-05 08:21:14 +00:00
|
|
|
static uint64_t
|
|
|
|
_spdk_bdev_get_io_size_in_byte(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
|
|
|
|
|
|
|
switch (bdev_io->type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_NVME_IO:
|
|
|
|
case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
|
|
|
|
return bdev_io->u.nvme_passthru.nbytes;
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
|
|
return bdev_io->u.bdev.num_blocks * bdev->blocklen;
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-24 21:46:42 +00:00
|
|
|
static bool
|
|
|
|
_spdk_bdev_qos_rw_queue_io(const struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
|
|
|
|
{
|
|
|
|
if (limit->max_per_timeslice > 0 && limit->remaining_this_timeslice <= 0) {
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-22 02:15:02 +00:00
|
|
|
static bool
|
|
|
|
_spdk_bdev_qos_r_queue_io(const struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
|
|
|
|
{
|
|
|
|
if (_spdk_bdev_is_read_io(io) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return _spdk_bdev_qos_rw_queue_io(limit, io);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
_spdk_bdev_qos_w_queue_io(const struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
|
|
|
|
{
|
|
|
|
if (_spdk_bdev_is_read_io(io) == true) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return _spdk_bdev_qos_rw_queue_io(limit, io);
|
|
|
|
}
|
|
|
|
|
2018-12-24 21:46:42 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_qos_rw_iops_update_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
|
|
|
|
{
|
|
|
|
limit->remaining_this_timeslice--;
|
|
|
|
}
|
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
static void
|
2018-12-24 21:46:42 +00:00
|
|
|
_spdk_bdev_qos_rw_bps_update_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
|
|
|
|
{
|
|
|
|
limit->remaining_this_timeslice -= _spdk_bdev_get_io_size_in_byte(io);
|
|
|
|
}
|
|
|
|
|
2018-06-22 02:15:02 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_qos_r_bps_update_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
|
|
|
|
{
|
|
|
|
if (_spdk_bdev_is_read_io(io) == false) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
return _spdk_bdev_qos_rw_bps_update_quota(limit, io);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_qos_w_bps_update_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
|
|
|
|
{
|
|
|
|
if (_spdk_bdev_is_read_io(io) == true) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
return _spdk_bdev_qos_rw_bps_update_quota(limit, io);
|
|
|
|
}
|
|
|
|
|
2018-12-24 21:46:42 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_qos_set_ops(struct spdk_bdev_qos *qos)
|
2018-09-04 15:01:51 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
|
2018-12-24 21:46:42 +00:00
|
|
|
qos->rate_limits[i].queue_io = NULL;
|
|
|
|
qos->rate_limits[i].update_quota = NULL;
|
2018-09-04 15:01:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (i) {
|
|
|
|
case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
|
2018-12-24 21:46:42 +00:00
|
|
|
qos->rate_limits[i].queue_io = _spdk_bdev_qos_rw_queue_io;
|
|
|
|
qos->rate_limits[i].update_quota = _spdk_bdev_qos_rw_iops_update_quota;
|
2018-09-04 15:01:51 +00:00
|
|
|
break;
|
|
|
|
case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
|
2018-12-24 21:46:42 +00:00
|
|
|
qos->rate_limits[i].queue_io = _spdk_bdev_qos_rw_queue_io;
|
|
|
|
qos->rate_limits[i].update_quota = _spdk_bdev_qos_rw_bps_update_quota;
|
2018-09-04 15:01:51 +00:00
|
|
|
break;
|
2018-06-22 02:15:02 +00:00
|
|
|
case SPDK_BDEV_QOS_R_BPS_RATE_LIMIT:
|
|
|
|
qos->rate_limits[i].queue_io = _spdk_bdev_qos_r_queue_io;
|
|
|
|
qos->rate_limits[i].update_quota = _spdk_bdev_qos_r_bps_update_quota;
|
|
|
|
break;
|
|
|
|
case SPDK_BDEV_QOS_W_BPS_RATE_LIMIT:
|
|
|
|
qos->rate_limits[i].queue_io = _spdk_bdev_qos_w_queue_io;
|
|
|
|
qos->rate_limits[i].update_quota = _spdk_bdev_qos_w_bps_update_quota;
|
|
|
|
break;
|
2018-09-04 15:01:51 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-21 13:18:56 +00:00
|
|
|
static inline void
|
|
|
|
_spdk_bdev_io_do_submit(struct spdk_bdev_channel *bdev_ch, struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
|
|
|
struct spdk_io_channel *ch = bdev_ch->channel;
|
|
|
|
struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
|
|
|
|
|
|
|
|
if (spdk_likely(TAILQ_EMPTY(&shared_resource->nomem_io))) {
|
|
|
|
bdev_ch->io_outstanding++;
|
|
|
|
shared_resource->io_outstanding++;
|
|
|
|
bdev_io->internal.in_submit_request = true;
|
|
|
|
bdev->fn_table->submit_request(ch, bdev_io);
|
|
|
|
bdev_io->internal.in_submit_request = false;
|
|
|
|
} else {
|
|
|
|
TAILQ_INSERT_TAIL(&shared_resource->nomem_io, bdev_io, internal.link);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-10 17:04:50 +00:00
|
|
|
static int
|
2018-10-09 05:10:33 +00:00
|
|
|
_spdk_bdev_qos_io_submit(struct spdk_bdev_channel *ch, struct spdk_bdev_qos *qos)
|
2017-12-28 03:11:55 +00:00
|
|
|
{
|
2018-12-24 21:46:42 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = NULL, *tmp = NULL;
|
2018-10-10 17:04:50 +00:00
|
|
|
int i, submitted_ios = 0;
|
2018-12-24 21:46:42 +00:00
|
|
|
|
|
|
|
TAILQ_FOREACH_SAFE(bdev_io, &qos->queued, internal.link, tmp) {
|
|
|
|
if (_spdk_bdev_qos_io_to_limit(bdev_io) == true) {
|
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (!qos->rate_limits[i].queue_io) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (qos->rate_limits[i].queue_io(&qos->rate_limits[i],
|
|
|
|
bdev_io) == true) {
|
|
|
|
return submitted_ios;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (!qos->rate_limits[i].update_quota) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
qos->rate_limits[i].update_quota(&qos->rate_limits[i], bdev_io);
|
2018-09-04 15:01:51 +00:00
|
|
|
}
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
2018-06-05 08:21:14 +00:00
|
|
|
|
2018-06-20 20:58:59 +00:00
|
|
|
TAILQ_REMOVE(&qos->queued, bdev_io, internal.link);
|
2019-05-21 13:18:56 +00:00
|
|
|
_spdk_bdev_io_do_submit(ch, bdev_io);
|
2018-10-10 17:04:50 +00:00
|
|
|
submitted_ios++;
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
2018-10-10 17:04:50 +00:00
|
|
|
|
|
|
|
return submitted_ios;
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
|
|
|
|
2018-09-27 07:36:37 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_queue_io_wait_with_cb(struct spdk_bdev_io *bdev_io, spdk_bdev_io_wait_cb cb_fn)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
bdev_io->internal.waitq_entry.bdev = bdev_io->bdev;
|
|
|
|
bdev_io->internal.waitq_entry.cb_fn = cb_fn;
|
|
|
|
bdev_io->internal.waitq_entry.cb_arg = bdev_io;
|
|
|
|
rc = spdk_bdev_queue_io_wait(bdev_io->bdev, spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
|
|
|
&bdev_io->internal.waitq_entry);
|
|
|
|
if (rc != 0) {
|
|
|
|
SPDK_ERRLOG("Queue IO failed, rc=%d\n", rc);
|
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
|
|
|
|
bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-17 18:04:04 +00:00
|
|
|
static bool
|
|
|
|
_spdk_bdev_io_type_can_split(uint8_t type)
|
|
|
|
{
|
|
|
|
assert(type != SPDK_BDEV_IO_TYPE_INVALID);
|
|
|
|
assert(type < SPDK_BDEV_NUM_IO_TYPES);
|
|
|
|
|
2018-08-30 15:06:54 +00:00
|
|
|
/* Only split READ and WRITE I/O. Theoretically other types of I/O like
|
|
|
|
* UNMAP could be split, but these types of I/O are typically much larger
|
|
|
|
* in size (sometimes the size of the entire block device), and the bdev
|
|
|
|
* module can more efficiently split these types of I/O. Plus those types
|
|
|
|
* of I/O do not have a payload, which makes the splitting process simpler.
|
|
|
|
*/
|
|
|
|
if (type == SPDK_BDEV_IO_TYPE_READ || type == SPDK_BDEV_IO_TYPE_WRITE) {
|
2018-08-17 18:04:04 +00:00
|
|
|
return true;
|
2018-08-30 15:06:54 +00:00
|
|
|
} else {
|
|
|
|
return false;
|
2018-08-17 18:04:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
2018-08-30 15:06:54 +00:00
|
|
|
_spdk_bdev_io_should_split(struct spdk_bdev_io *bdev_io)
|
2018-08-17 18:04:04 +00:00
|
|
|
{
|
|
|
|
uint64_t start_stripe, end_stripe;
|
|
|
|
uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary;
|
|
|
|
|
|
|
|
if (io_boundary == 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!_spdk_bdev_io_type_can_split(bdev_io->type)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
start_stripe = bdev_io->u.bdev.offset_blocks;
|
|
|
|
end_stripe = start_stripe + bdev_io->u.bdev.num_blocks - 1;
|
|
|
|
/* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */
|
|
|
|
if (spdk_likely(spdk_u32_is_pow2(io_boundary))) {
|
|
|
|
start_stripe >>= spdk_u32log2(io_boundary);
|
|
|
|
end_stripe >>= spdk_u32log2(io_boundary);
|
|
|
|
} else {
|
|
|
|
start_stripe /= io_boundary;
|
|
|
|
end_stripe /= io_boundary;
|
|
|
|
}
|
|
|
|
return (start_stripe != end_stripe);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
_to_next_boundary(uint64_t offset, uint32_t boundary)
|
|
|
|
{
|
|
|
|
return (boundary - (offset % boundary));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
|
|
|
|
|
|
|
|
static void
|
2019-06-11 02:21:10 +00:00
|
|
|
_spdk_bdev_io_split(void *_bdev_io)
|
2018-08-17 18:04:04 +00:00
|
|
|
{
|
|
|
|
struct spdk_bdev_io *bdev_io = _bdev_io;
|
2018-10-04 07:26:19 +00:00
|
|
|
uint64_t current_offset, remaining;
|
bdev: rewind child offset to last block size aligned iov
Here is the an example to describe existing issue:
There is a Write request with 64KiB data length, and this IO is cross the IO
boundary. We assume that the parent IO will have 2 children requests, one is
33KiB length, the other one is 31KiB. Here is the view of parent iovs, the
first 33KiB length data has 33 iovs:
iov.[0].iov_length = 1024;
.
.
iov.[31].iov_length = 256;
iov.[32].iov_length = 768;
.
.
iov.[64].iov_length = 1024;
In function _spdk_bdev_io_split(), then you can see that for the 33KiB length
child request, exiting code will run out of child child_iov space and return
error due to last one data buffer is not block size aligned.
Here we can rewind the existing offset to last block size aligned buffer to
avoid the error case, for backend which need aligned data buffer such as
AIO backend, the request will go through spdk_bdev_io_get_buf() again to
do the data copy, otherwise for those backend devices such as NVMe with
hardware SGL support, 256 data segment is fine for them.
Change-Id: I96ebdf29829d86f9b38fab28a7406eedc9fa44ef
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453604
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-28 03:05:24 +00:00
|
|
|
uint32_t blocklen, to_next_boundary, to_next_boundary_bytes, to_last_block_bytes;
|
2018-10-04 07:44:55 +00:00
|
|
|
struct iovec *parent_iov, *iov;
|
|
|
|
uint64_t parent_iov_offset, iov_len;
|
|
|
|
uint32_t parent_iovpos, parent_iovcnt, child_iovcnt, iovcnt;
|
2019-04-16 08:12:09 +00:00
|
|
|
void *md_buf = NULL;
|
2018-08-17 18:04:04 +00:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
remaining = bdev_io->u.bdev.split_remaining_num_blocks;
|
|
|
|
current_offset = bdev_io->u.bdev.split_current_offset_blocks;
|
|
|
|
blocklen = bdev_io->bdev->blocklen;
|
2018-10-04 07:26:19 +00:00
|
|
|
parent_iov_offset = (current_offset - bdev_io->u.bdev.offset_blocks) * blocklen;
|
|
|
|
parent_iovcnt = bdev_io->u.bdev.iovcnt;
|
|
|
|
|
|
|
|
for (parent_iovpos = 0; parent_iovpos < parent_iovcnt; parent_iovpos++) {
|
|
|
|
parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
|
|
|
|
if (parent_iov_offset < parent_iov->iov_len) {
|
|
|
|
break;
|
2018-08-17 18:04:04 +00:00
|
|
|
}
|
2018-10-04 07:26:19 +00:00
|
|
|
parent_iov_offset -= parent_iov->iov_len;
|
2018-08-17 18:04:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
child_iovcnt = 0;
|
2018-10-04 07:44:55 +00:00
|
|
|
while (remaining > 0 && parent_iovpos < parent_iovcnt && child_iovcnt < BDEV_IO_NUM_CHILD_IOV) {
|
|
|
|
to_next_boundary = _to_next_boundary(current_offset, bdev_io->bdev->optimal_io_boundary);
|
|
|
|
to_next_boundary = spdk_min(remaining, to_next_boundary);
|
|
|
|
to_next_boundary_bytes = to_next_boundary * blocklen;
|
|
|
|
iov = &bdev_io->child_iov[child_iovcnt];
|
|
|
|
iovcnt = 0;
|
2019-04-16 08:12:09 +00:00
|
|
|
|
|
|
|
if (bdev_io->u.bdev.md_buf) {
|
|
|
|
assert((parent_iov_offset % blocklen) > 0);
|
|
|
|
md_buf = (char *)bdev_io->u.bdev.md_buf + (parent_iov_offset / blocklen) *
|
|
|
|
spdk_bdev_get_md_size(bdev_io->bdev);
|
|
|
|
}
|
|
|
|
|
2018-10-04 07:44:55 +00:00
|
|
|
while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt &&
|
|
|
|
child_iovcnt < BDEV_IO_NUM_CHILD_IOV) {
|
|
|
|
parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
|
|
|
|
iov_len = spdk_min(to_next_boundary_bytes, parent_iov->iov_len - parent_iov_offset);
|
|
|
|
to_next_boundary_bytes -= iov_len;
|
|
|
|
|
|
|
|
bdev_io->child_iov[child_iovcnt].iov_base = parent_iov->iov_base + parent_iov_offset;
|
|
|
|
bdev_io->child_iov[child_iovcnt].iov_len = iov_len;
|
|
|
|
|
|
|
|
if (iov_len < parent_iov->iov_len - parent_iov_offset) {
|
|
|
|
parent_iov_offset += iov_len;
|
|
|
|
} else {
|
|
|
|
parent_iovpos++;
|
|
|
|
parent_iov_offset = 0;
|
|
|
|
}
|
|
|
|
child_iovcnt++;
|
|
|
|
iovcnt++;
|
|
|
|
}
|
2018-08-17 18:04:04 +00:00
|
|
|
|
2018-10-04 07:44:55 +00:00
|
|
|
if (to_next_boundary_bytes > 0) {
|
|
|
|
/* We had to stop this child I/O early because we ran out of
|
bdev: rewind child offset to last block size aligned iov
Here is the an example to describe existing issue:
There is a Write request with 64KiB data length, and this IO is cross the IO
boundary. We assume that the parent IO will have 2 children requests, one is
33KiB length, the other one is 31KiB. Here is the view of parent iovs, the
first 33KiB length data has 33 iovs:
iov.[0].iov_length = 1024;
.
.
iov.[31].iov_length = 256;
iov.[32].iov_length = 768;
.
.
iov.[64].iov_length = 1024;
In function _spdk_bdev_io_split(), then you can see that for the 33KiB length
child request, exiting code will run out of child child_iov space and return
error due to last one data buffer is not block size aligned.
Here we can rewind the existing offset to last block size aligned buffer to
avoid the error case, for backend which need aligned data buffer such as
AIO backend, the request will go through spdk_bdev_io_get_buf() again to
do the data copy, otherwise for those backend devices such as NVMe with
hardware SGL support, 256 data segment is fine for them.
Change-Id: I96ebdf29829d86f9b38fab28a7406eedc9fa44ef
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453604
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-28 03:05:24 +00:00
|
|
|
* child_iov space. Ensure the iovs to be aligned with block
|
|
|
|
* size and then adjust to_next_boundary before starting the
|
|
|
|
* child I/O.
|
2018-10-04 07:44:55 +00:00
|
|
|
*/
|
2019-07-01 04:43:05 +00:00
|
|
|
assert(child_iovcnt == BDEV_IO_NUM_CHILD_IOV);
|
bdev: rewind child offset to last block size aligned iov
Here is the an example to describe existing issue:
There is a Write request with 64KiB data length, and this IO is cross the IO
boundary. We assume that the parent IO will have 2 children requests, one is
33KiB length, the other one is 31KiB. Here is the view of parent iovs, the
first 33KiB length data has 33 iovs:
iov.[0].iov_length = 1024;
.
.
iov.[31].iov_length = 256;
iov.[32].iov_length = 768;
.
.
iov.[64].iov_length = 1024;
In function _spdk_bdev_io_split(), then you can see that for the 33KiB length
child request, exiting code will run out of child child_iov space and return
error due to last one data buffer is not block size aligned.
Here we can rewind the existing offset to last block size aligned buffer to
avoid the error case, for backend which need aligned data buffer such as
AIO backend, the request will go through spdk_bdev_io_get_buf() again to
do the data copy, otherwise for those backend devices such as NVMe with
hardware SGL support, 256 data segment is fine for them.
Change-Id: I96ebdf29829d86f9b38fab28a7406eedc9fa44ef
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453604
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-28 03:05:24 +00:00
|
|
|
to_last_block_bytes = to_next_boundary_bytes % blocklen;
|
|
|
|
if (to_last_block_bytes != 0) {
|
2019-07-01 04:43:05 +00:00
|
|
|
uint32_t child_iovpos = child_iovcnt - 1;
|
|
|
|
/* don't decrease child_iovcnt so the loop will naturally end */
|
bdev: rewind child offset to last block size aligned iov
Here is the an example to describe existing issue:
There is a Write request with 64KiB data length, and this IO is cross the IO
boundary. We assume that the parent IO will have 2 children requests, one is
33KiB length, the other one is 31KiB. Here is the view of parent iovs, the
first 33KiB length data has 33 iovs:
iov.[0].iov_length = 1024;
.
.
iov.[31].iov_length = 256;
iov.[32].iov_length = 768;
.
.
iov.[64].iov_length = 1024;
In function _spdk_bdev_io_split(), then you can see that for the 33KiB length
child request, exiting code will run out of child child_iov space and return
error due to last one data buffer is not block size aligned.
Here we can rewind the existing offset to last block size aligned buffer to
avoid the error case, for backend which need aligned data buffer such as
AIO backend, the request will go through spdk_bdev_io_get_buf() again to
do the data copy, otherwise for those backend devices such as NVMe with
hardware SGL support, 256 data segment is fine for them.
Change-Id: I96ebdf29829d86f9b38fab28a7406eedc9fa44ef
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453604
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-28 03:05:24 +00:00
|
|
|
|
2019-07-01 04:43:05 +00:00
|
|
|
to_next_boundary_bytes += _to_next_boundary(to_next_boundary_bytes, blocklen);
|
bdev: rewind child offset to last block size aligned iov
Here is the an example to describe existing issue:
There is a Write request with 64KiB data length, and this IO is cross the IO
boundary. We assume that the parent IO will have 2 children requests, one is
33KiB length, the other one is 31KiB. Here is the view of parent iovs, the
first 33KiB length data has 33 iovs:
iov.[0].iov_length = 1024;
.
.
iov.[31].iov_length = 256;
iov.[32].iov_length = 768;
.
.
iov.[64].iov_length = 1024;
In function _spdk_bdev_io_split(), then you can see that for the 33KiB length
child request, exiting code will run out of child child_iov space and return
error due to last one data buffer is not block size aligned.
Here we can rewind the existing offset to last block size aligned buffer to
avoid the error case, for backend which need aligned data buffer such as
AIO backend, the request will go through spdk_bdev_io_get_buf() again to
do the data copy, otherwise for those backend devices such as NVMe with
hardware SGL support, 256 data segment is fine for them.
Change-Id: I96ebdf29829d86f9b38fab28a7406eedc9fa44ef
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453604
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-28 03:05:24 +00:00
|
|
|
while (to_last_block_bytes > 0 && iovcnt > 0) {
|
|
|
|
iov_len = spdk_min(to_last_block_bytes,
|
2019-07-01 04:43:05 +00:00
|
|
|
bdev_io->child_iov[child_iovpos].iov_len);
|
|
|
|
bdev_io->child_iov[child_iovpos].iov_len -= iov_len;
|
|
|
|
if (bdev_io->child_iov[child_iovpos].iov_len == 0) {
|
|
|
|
child_iovpos--;
|
bdev: rewind child offset to last block size aligned iov
Here is the an example to describe existing issue:
There is a Write request with 64KiB data length, and this IO is cross the IO
boundary. We assume that the parent IO will have 2 children requests, one is
33KiB length, the other one is 31KiB. Here is the view of parent iovs, the
first 33KiB length data has 33 iovs:
iov.[0].iov_length = 1024;
.
.
iov.[31].iov_length = 256;
iov.[32].iov_length = 768;
.
.
iov.[64].iov_length = 1024;
In function _spdk_bdev_io_split(), then you can see that for the 33KiB length
child request, exiting code will run out of child child_iov space and return
error due to last one data buffer is not block size aligned.
Here we can rewind the existing offset to last block size aligned buffer to
avoid the error case, for backend which need aligned data buffer such as
AIO backend, the request will go through spdk_bdev_io_get_buf() again to
do the data copy, otherwise for those backend devices such as NVMe with
hardware SGL support, 256 data segment is fine for them.
Change-Id: I96ebdf29829d86f9b38fab28a7406eedc9fa44ef
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453604
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-28 03:05:24 +00:00
|
|
|
iovcnt--;
|
|
|
|
}
|
|
|
|
to_last_block_bytes -= iov_len;
|
2018-10-04 07:44:55 +00:00
|
|
|
}
|
bdev: rewind child offset to last block size aligned iov
Here is the an example to describe existing issue:
There is a Write request with 64KiB data length, and this IO is cross the IO
boundary. We assume that the parent IO will have 2 children requests, one is
33KiB length, the other one is 31KiB. Here is the view of parent iovs, the
first 33KiB length data has 33 iovs:
iov.[0].iov_length = 1024;
.
.
iov.[31].iov_length = 256;
iov.[32].iov_length = 768;
.
.
iov.[64].iov_length = 1024;
In function _spdk_bdev_io_split(), then you can see that for the 33KiB length
child request, exiting code will run out of child child_iov space and return
error due to last one data buffer is not block size aligned.
Here we can rewind the existing offset to last block size aligned buffer to
avoid the error case, for backend which need aligned data buffer such as
AIO backend, the request will go through spdk_bdev_io_get_buf() again to
do the data copy, otherwise for those backend devices such as NVMe with
hardware SGL support, 256 data segment is fine for them.
Change-Id: I96ebdf29829d86f9b38fab28a7406eedc9fa44ef
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453604
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-28 03:05:24 +00:00
|
|
|
|
|
|
|
assert(to_last_block_bytes == 0);
|
2018-10-04 07:44:55 +00:00
|
|
|
}
|
|
|
|
to_next_boundary -= to_next_boundary_bytes / blocklen;
|
|
|
|
}
|
2018-08-17 18:04:04 +00:00
|
|
|
|
2018-10-04 07:44:55 +00:00
|
|
|
bdev_io->u.bdev.split_outstanding++;
|
2018-09-25 06:01:48 +00:00
|
|
|
|
2018-10-04 07:44:55 +00:00
|
|
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
|
2019-04-16 08:12:09 +00:00
|
|
|
rc = _spdk_bdev_readv_blocks_with_md(bdev_io->internal.desc,
|
|
|
|
spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
|
|
|
iov, iovcnt, md_buf, current_offset,
|
|
|
|
to_next_boundary,
|
|
|
|
_spdk_bdev_io_split_done, bdev_io);
|
2018-10-04 07:44:55 +00:00
|
|
|
} else {
|
2019-04-16 08:12:09 +00:00
|
|
|
rc = _spdk_bdev_writev_blocks_with_md(bdev_io->internal.desc,
|
|
|
|
spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
|
|
|
iov, iovcnt, md_buf, current_offset,
|
|
|
|
to_next_boundary,
|
|
|
|
_spdk_bdev_io_split_done, bdev_io);
|
2018-08-17 18:04:04 +00:00
|
|
|
}
|
|
|
|
|
2018-10-04 07:44:55 +00:00
|
|
|
if (rc == 0) {
|
|
|
|
current_offset += to_next_boundary;
|
|
|
|
remaining -= to_next_boundary;
|
|
|
|
bdev_io->u.bdev.split_current_offset_blocks = current_offset;
|
|
|
|
bdev_io->u.bdev.split_remaining_num_blocks = remaining;
|
|
|
|
} else {
|
|
|
|
bdev_io->u.bdev.split_outstanding--;
|
|
|
|
if (rc == -ENOMEM) {
|
|
|
|
if (bdev_io->u.bdev.split_outstanding == 0) {
|
|
|
|
/* No I/O is outstanding. Hence we should wait here. */
|
|
|
|
_spdk_bdev_queue_io_wait_with_cb(bdev_io,
|
2019-06-11 02:21:10 +00:00
|
|
|
_spdk_bdev_io_split);
|
2018-10-04 07:44:55 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
|
|
|
|
if (bdev_io->u.bdev.split_outstanding == 0) {
|
|
|
|
bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
|
|
|
|
}
|
|
|
|
}
|
2018-08-17 18:04:04 +00:00
|
|
|
|
2018-10-04 07:44:55 +00:00
|
|
|
return;
|
|
|
|
}
|
2018-08-17 18:04:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_io *parent_io = cb_arg;
|
|
|
|
|
|
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
|
|
|
|
if (!success) {
|
|
|
|
parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
|
2018-10-04 07:44:55 +00:00
|
|
|
}
|
|
|
|
parent_io->u.bdev.split_outstanding--;
|
|
|
|
if (parent_io->u.bdev.split_outstanding != 0) {
|
2018-08-17 18:04:04 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-10-04 07:44:55 +00:00
|
|
|
/*
|
2019-05-16 01:32:18 +00:00
|
|
|
* Parent I/O finishes when all blocks are consumed.
|
2018-10-04 07:44:55 +00:00
|
|
|
*/
|
2019-05-16 01:32:18 +00:00
|
|
|
if (parent_io->u.bdev.split_remaining_num_blocks == 0) {
|
2018-10-04 07:44:55 +00:00
|
|
|
parent_io->internal.cb(parent_io, parent_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
|
|
|
|
parent_io->internal.caller_ctx);
|
2018-08-17 18:04:04 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Continue with the splitting process. This function will complete the parent I/O if the
|
|
|
|
* splitting is done.
|
|
|
|
*/
|
2019-06-11 02:21:10 +00:00
|
|
|
_spdk_bdev_io_split(parent_io);
|
2018-08-17 18:04:04 +00:00
|
|
|
}
|
|
|
|
|
bdev: split requests first if the request has data buffer
There is one existing example usage case to describe the issue:
Users(e.g. Vhost-blk target with Windows Guest) call spdk_bdev_readv_blocks()
to submit a 128KiB length data READ request, and the data buffer
provides by vhost isn't aligned, but the backend block device
requires aligned data buffer, so existing function call trace:
spdk_bdev_readv_blocks()-->
spdk_bdev_io_submit()-->
spdk_bdev_io_get_buf()
spdk_bdev_io_get_buf() will allocate buffer from large data
buffer pool for 128KiB length, of course, it will return error
with existing logic.
So here, no matter what the data length is, we can go through
the split process first for both READ and WRITE.
However, there is one scenario that for iSCSI READ request,
the iSCSI layer will not allocate data buffer for the request,
so for this case if the IO boundary is required we should keep
the logic as before.
Change-Id: I67661f5fa4c3c7c561b45c86146759aa3477adbf
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453133
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-19 03:51:43 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_io_split_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
|
|
|
|
bool success);
|
|
|
|
|
2018-08-17 18:04:04 +00:00
|
|
|
static void
|
2019-06-11 02:21:10 +00:00
|
|
|
spdk_bdev_io_split(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
|
2018-08-17 18:04:04 +00:00
|
|
|
{
|
|
|
|
assert(_spdk_bdev_io_type_can_split(bdev_io->type));
|
|
|
|
|
|
|
|
bdev_io->u.bdev.split_current_offset_blocks = bdev_io->u.bdev.offset_blocks;
|
|
|
|
bdev_io->u.bdev.split_remaining_num_blocks = bdev_io->u.bdev.num_blocks;
|
2018-10-04 07:44:55 +00:00
|
|
|
bdev_io->u.bdev.split_outstanding = 0;
|
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
2018-08-17 18:04:04 +00:00
|
|
|
|
bdev: split requests first if the request has data buffer
There is one existing example usage case to describe the issue:
Users(e.g. Vhost-blk target with Windows Guest) call spdk_bdev_readv_blocks()
to submit a 128KiB length data READ request, and the data buffer
provides by vhost isn't aligned, but the backend block device
requires aligned data buffer, so existing function call trace:
spdk_bdev_readv_blocks()-->
spdk_bdev_io_submit()-->
spdk_bdev_io_get_buf()
spdk_bdev_io_get_buf() will allocate buffer from large data
buffer pool for 128KiB length, of course, it will return error
with existing logic.
So here, no matter what the data length is, we can go through
the split process first for both READ and WRITE.
However, there is one scenario that for iSCSI READ request,
the iSCSI layer will not allocate data buffer for the request,
so for this case if the IO boundary is required we should keep
the logic as before.
Change-Id: I67661f5fa4c3c7c561b45c86146759aa3477adbf
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453133
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-19 03:51:43 +00:00
|
|
|
if (_is_buf_allocated(bdev_io->u.bdev.iovs)) {
|
|
|
|
_spdk_bdev_io_split(bdev_io);
|
|
|
|
} else {
|
|
|
|
assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
|
|
|
|
spdk_bdev_io_get_buf(bdev_io, _spdk_bdev_io_split_get_buf_cb,
|
|
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
|
|
|
|
}
|
2018-08-17 18:04:04 +00:00
|
|
|
}
|
|
|
|
|
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb
When the specified buffer size to spdk_bdev_io_get_buf() is greater
than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and
doesn't call the specified callback function.
SPDK SCSI library doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
Bdev perf tool also doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
When we support DIF insert and strip in iSCSI target, the read
buffer size iSCSI initiator requests and the read buffer size iSCSI target
requests will become different.
Even after that, iSCSI initiator and iSCSI target will negotiate correctly
not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI
initiator ignores the result of negotiation, iSCSI initiator can request
read buffer size larger than the permitted maximum, and can cause
failure in iSCSI target. This is very flagile and should be avoided.
This patch do the following
- Add the completion status of spdk_bdev_io_get_buf() to
spdk_bdev_io_get_buf_cb(),
- spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting
success to false, and return.
- spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success
is false.
Subsequent patches will process the case that success is false
in spdk_bdev_io_get_buf_cb().
Change-Id: I76429a86e18a69aa085a353ac94743296d270b82
Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-on: https://review.gerrithub.io/c/446045
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_io_split_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
|
|
|
|
bool success)
|
|
|
|
{
|
2019-02-25 01:43:13 +00:00
|
|
|
if (!success) {
|
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
return;
|
|
|
|
}
|
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb
When the specified buffer size to spdk_bdev_io_get_buf() is greater
than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and
doesn't call the specified callback function.
SPDK SCSI library doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
Bdev perf tool also doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
When we support DIF insert and strip in iSCSI target, the read
buffer size iSCSI initiator requests and the read buffer size iSCSI target
requests will become different.
Even after that, iSCSI initiator and iSCSI target will negotiate correctly
not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI
initiator ignores the result of negotiation, iSCSI initiator can request
read buffer size larger than the permitted maximum, and can cause
failure in iSCSI target. This is very flagile and should be avoided.
This patch do the following
- Add the completion status of spdk_bdev_io_get_buf() to
spdk_bdev_io_get_buf_cb(),
- spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting
success to false, and return.
- spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success
is false.
Subsequent patches will process the case that success is false
in spdk_bdev_io_get_buf_cb().
Change-Id: I76429a86e18a69aa085a353ac94743296d270b82
Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-on: https://review.gerrithub.io/c/446045
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
|
|
|
|
2019-06-11 02:21:10 +00:00
|
|
|
spdk_bdev_io_split(ch, bdev_io);
|
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb
When the specified buffer size to spdk_bdev_io_get_buf() is greater
than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and
doesn't call the specified callback function.
SPDK SCSI library doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
Bdev perf tool also doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
When we support DIF insert and strip in iSCSI target, the read
buffer size iSCSI initiator requests and the read buffer size iSCSI target
requests will become different.
Even after that, iSCSI initiator and iSCSI target will negotiate correctly
not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI
initiator ignores the result of negotiation, iSCSI initiator can request
read buffer size larger than the permitted maximum, and can cause
failure in iSCSI target. This is very flagile and should be avoided.
This patch do the following
- Add the completion status of spdk_bdev_io_get_buf() to
spdk_bdev_io_get_buf_cb(),
- spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting
success to false, and return.
- spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success
is false.
Subsequent patches will process the case that success is false
in spdk_bdev_io_get_buf_cb().
Change-Id: I76429a86e18a69aa085a353ac94743296d270b82
Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-on: https://review.gerrithub.io/c/446045
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
|
|
|
}
|
|
|
|
|
2019-02-04 17:34:09 +00:00
|
|
|
/* Explicitly mark this inline, since it's used as a function pointer and otherwise won't
|
|
|
|
* be inlined, at least on some compilers.
|
|
|
|
*/
|
|
|
|
static inline void
|
2017-12-28 03:11:55 +00:00
|
|
|
_spdk_bdev_io_submit(void *ctx)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-12-28 03:11:55 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = ctx;
|
2017-09-07 21:42:50 +00:00
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
2018-06-20 17:54:48 +00:00
|
|
|
struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
|
2018-05-04 08:10:52 +00:00
|
|
|
struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
|
2018-08-31 20:13:32 +00:00
|
|
|
uint64_t tsc;
|
2017-09-14 21:02:09 +00:00
|
|
|
|
2018-08-31 20:13:32 +00:00
|
|
|
tsc = spdk_get_ticks();
|
|
|
|
bdev_io->internal.submit_tsc = tsc;
|
|
|
|
spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_START, 0, 0, (uintptr_t)bdev_io, bdev_io->type);
|
2019-05-21 13:18:56 +00:00
|
|
|
|
|
|
|
if (spdk_likely(bdev_ch->flags == 0)) {
|
|
|
|
_spdk_bdev_io_do_submit(bdev_ch, bdev_io);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-03-16 12:20:55 +00:00
|
|
|
bdev_ch->io_outstanding++;
|
2018-05-04 08:10:52 +00:00
|
|
|
shared_resource->io_outstanding++;
|
2018-06-19 23:27:19 +00:00
|
|
|
bdev_io->internal.in_submit_request = true;
|
2019-05-21 13:18:56 +00:00
|
|
|
if (bdev_ch->flags & BDEV_CH_RESET_IN_PROGRESS) {
|
2017-09-08 18:44:50 +00:00
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
2017-12-28 03:11:55 +00:00
|
|
|
} else if (bdev_ch->flags & BDEV_CH_QOS_ENABLED) {
|
2018-03-30 02:55:05 +00:00
|
|
|
bdev_ch->io_outstanding--;
|
2018-05-04 08:10:52 +00:00
|
|
|
shared_resource->io_outstanding--;
|
2018-06-21 20:03:02 +00:00
|
|
|
TAILQ_INSERT_TAIL(&bdev->internal.qos->queued, bdev_io, internal.link);
|
2018-10-09 05:10:33 +00:00
|
|
|
_spdk_bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
|
2017-09-08 18:44:50 +00:00
|
|
|
} else {
|
|
|
|
SPDK_ERRLOG("unknown bdev_ch flag %x found\n", bdev_ch->flags);
|
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
}
|
2018-06-19 23:27:19 +00:00
|
|
|
bdev_io->internal.in_submit_request = false;
|
2017-09-14 21:02:09 +00:00
|
|
|
}
|
|
|
|
|
2017-12-28 03:11:55 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
2019-05-31 05:49:38 +00:00
|
|
|
struct spdk_thread *thread = spdk_bdev_io_get_thread(bdev_io);
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-07-10 13:01:08 +00:00
|
|
|
assert(thread != NULL);
|
2018-06-19 23:19:49 +00:00
|
|
|
assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-08-30 15:06:54 +00:00
|
|
|
if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_should_split(bdev_io)) {
|
bdev: split requests first if the request has data buffer
There is one existing example usage case to describe the issue:
Users(e.g. Vhost-blk target with Windows Guest) call spdk_bdev_readv_blocks()
to submit a 128KiB length data READ request, and the data buffer
provides by vhost isn't aligned, but the backend block device
requires aligned data buffer, so existing function call trace:
spdk_bdev_readv_blocks()-->
spdk_bdev_io_submit()-->
spdk_bdev_io_get_buf()
spdk_bdev_io_get_buf() will allocate buffer from large data
buffer pool for 128KiB length, of course, it will return error
with existing logic.
So here, no matter what the data length is, we can go through
the split process first for both READ and WRITE.
However, there is one scenario that for iSCSI READ request,
the iSCSI layer will not allocate data buffer for the request,
so for this case if the IO boundary is required we should keep
the logic as before.
Change-Id: I67661f5fa4c3c7c561b45c86146759aa3477adbf
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/453133
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-06-19 03:51:43 +00:00
|
|
|
spdk_bdev_io_split(NULL, bdev_io);
|
2018-08-17 18:04:04 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
if (bdev_io->internal.ch->flags & BDEV_CH_QOS_ENABLED) {
|
2018-07-10 13:01:08 +00:00
|
|
|
if ((thread == bdev->internal.qos->thread) || !bdev->internal.qos->thread) {
|
2018-05-30 20:15:32 +00:00
|
|
|
_spdk_bdev_io_submit(bdev_io);
|
|
|
|
} else {
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.io_submit_ch = bdev_io->internal.ch;
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev_io->internal.ch = bdev->internal.qos->ch;
|
|
|
|
spdk_thread_send_msg(bdev->internal.qos->thread, _spdk_bdev_io_submit, bdev_io);
|
2018-05-30 20:15:32 +00:00
|
|
|
}
|
2017-12-28 03:11:55 +00:00
|
|
|
} else {
|
|
|
|
_spdk_bdev_io_submit(bdev_io);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-14 21:02:09 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_io_submit_reset(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
2018-06-20 17:54:48 +00:00
|
|
|
struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
|
2017-09-14 21:02:09 +00:00
|
|
|
struct spdk_io_channel *ch = bdev_ch->channel;
|
2017-05-04 20:18:03 +00:00
|
|
|
|
2018-06-19 23:19:49 +00:00
|
|
|
assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
|
2017-05-04 20:18:03 +00:00
|
|
|
|
2018-06-19 23:27:19 +00:00
|
|
|
bdev_io->internal.in_submit_request = true;
|
2017-05-04 20:18:03 +00:00
|
|
|
bdev->fn_table->submit_request(ch, bdev_io);
|
2018-06-19 23:27:19 +00:00
|
|
|
bdev_io->internal.in_submit_request = false;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spdk_bdev_io_init(struct spdk_bdev_io *bdev_io,
|
|
|
|
struct spdk_bdev *bdev, void *cb_arg,
|
|
|
|
spdk_bdev_io_completion_cb cb)
|
|
|
|
{
|
|
|
|
bdev_io->bdev = bdev;
|
2018-06-19 23:57:20 +00:00
|
|
|
bdev_io->internal.caller_ctx = cb_arg;
|
|
|
|
bdev_io->internal.cb = cb;
|
2018-06-19 23:19:49 +00:00
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
|
2018-06-19 23:27:19 +00:00
|
|
|
bdev_io->internal.in_submit_request = false;
|
2018-06-19 22:08:31 +00:00
|
|
|
bdev_io->internal.buf = NULL;
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.io_submit_ch = NULL;
|
2018-10-12 07:46:14 +00:00
|
|
|
bdev_io->internal.orig_iovs = NULL;
|
|
|
|
bdev_io->internal.orig_iovcnt = 0;
|
2019-04-16 08:12:09 +00:00
|
|
|
bdev_io->internal.orig_md_buf = NULL;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-06-21 16:34:42 +00:00
|
|
|
static bool
|
|
|
|
_spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
|
|
|
|
{
|
|
|
|
return bdev->fn_table->io_type_supported(bdev->ctxt, io_type);
|
|
|
|
}
|
|
|
|
|
2016-08-24 17:25:49 +00:00
|
|
|
bool
|
|
|
|
spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
|
|
|
|
{
|
2018-06-21 16:34:42 +00:00
|
|
|
bool supported;
|
|
|
|
|
|
|
|
supported = _spdk_bdev_io_type_supported(bdev, io_type);
|
|
|
|
|
|
|
|
if (!supported) {
|
|
|
|
switch (io_type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
|
|
|
|
/* The bdev layer will emulate write zeroes as long as write is supported. */
|
|
|
|
supported = _spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE);
|
|
|
|
break;
|
2019-04-02 18:25:10 +00:00
|
|
|
case SPDK_BDEV_IO_TYPE_ZCOPY:
|
|
|
|
/* Zero copy can be emulated with regular read and write */
|
|
|
|
supported = _spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_READ) &&
|
|
|
|
_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE);
|
|
|
|
break;
|
2018-06-21 16:34:42 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return supported;
|
2016-08-24 17:25:49 +00:00
|
|
|
}
|
|
|
|
|
2016-11-18 17:22:58 +00:00
|
|
|
int
|
2018-02-22 12:48:13 +00:00
|
|
|
spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
|
2016-11-18 17:22:58 +00:00
|
|
|
{
|
2018-02-22 12:48:13 +00:00
|
|
|
if (bdev->fn_table->dump_info_json) {
|
|
|
|
return bdev->fn_table->dump_info_json(bdev->ctxt, w);
|
2016-11-18 17:22:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-12-28 03:11:55 +00:00
|
|
|
static void
|
2018-06-05 08:21:14 +00:00
|
|
|
spdk_bdev_qos_update_max_quota_per_timeslice(struct spdk_bdev_qos *qos)
|
2017-12-28 03:11:55 +00:00
|
|
|
{
|
2018-09-04 15:01:51 +00:00
|
|
|
uint32_t max_per_timeslice = 0;
|
|
|
|
int i;
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
|
|
|
|
qos->rate_limits[i].max_per_timeslice = 0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
max_per_timeslice = qos->rate_limits[i].limit *
|
|
|
|
SPDK_BDEV_QOS_TIMESLICE_IN_USEC / SPDK_SEC_TO_USEC;
|
|
|
|
|
|
|
|
qos->rate_limits[i].max_per_timeslice = spdk_max(max_per_timeslice,
|
|
|
|
qos->rate_limits[i].min_per_timeslice);
|
2018-06-05 08:21:14 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
qos->rate_limits[i].remaining_this_timeslice = qos->rate_limits[i].max_per_timeslice;
|
2018-06-05 08:21:14 +00:00
|
|
|
}
|
2018-12-24 21:46:42 +00:00
|
|
|
|
|
|
|
_spdk_bdev_qos_set_ops(qos);
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
|
|
|
|
2018-03-13 00:16:47 +00:00
|
|
|
static int
|
2017-12-28 03:11:55 +00:00
|
|
|
spdk_bdev_channel_poll_qos(void *arg)
|
|
|
|
{
|
2018-04-24 22:44:14 +00:00
|
|
|
struct spdk_bdev_qos *qos = arg;
|
2018-08-27 19:17:31 +00:00
|
|
|
uint64_t now = spdk_get_ticks();
|
2018-09-04 15:01:51 +00:00
|
|
|
int i;
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-08-27 19:17:31 +00:00
|
|
|
if (now < (qos->last_timeslice + qos->timeslice_size)) {
|
|
|
|
/* We received our callback earlier than expected - return
|
|
|
|
* immediately and wait to do accounting until at least one
|
|
|
|
* timeslice has actually expired. This should never happen
|
|
|
|
* with a well-behaved timer implementation.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
2018-07-04 01:43:39 +00:00
|
|
|
|
2018-08-27 19:17:31 +00:00
|
|
|
/* Reset for next round of rate limiting */
|
2018-09-04 15:01:51 +00:00
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
/* We may have allowed the IOs or bytes to slightly overrun in the last
|
|
|
|
* timeslice. remaining_this_timeslice is signed, so if it's negative
|
|
|
|
* here, we'll account for the overrun so that the next timeslice will
|
|
|
|
* be appropriately reduced.
|
|
|
|
*/
|
|
|
|
if (qos->rate_limits[i].remaining_this_timeslice > 0) {
|
|
|
|
qos->rate_limits[i].remaining_this_timeslice = 0;
|
|
|
|
}
|
2018-07-04 01:43:39 +00:00
|
|
|
}
|
2018-08-27 19:17:31 +00:00
|
|
|
|
|
|
|
while (now >= (qos->last_timeslice + qos->timeslice_size)) {
|
|
|
|
qos->last_timeslice += qos->timeslice_size;
|
2018-09-04 15:01:51 +00:00
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
qos->rate_limits[i].remaining_this_timeslice +=
|
|
|
|
qos->rate_limits[i].max_per_timeslice;
|
|
|
|
}
|
2018-08-27 19:17:31 +00:00
|
|
|
}
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-10-10 17:04:50 +00:00
|
|
|
return _spdk_bdev_qos_io_submit(qos->ch, qos);
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
|
|
|
|
2018-01-21 23:03:27 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_channel_destroy_resource(struct spdk_bdev_channel *ch)
|
|
|
|
{
|
2018-05-04 08:10:52 +00:00
|
|
|
struct spdk_bdev_shared_resource *shared_resource;
|
2018-05-04 07:58:01 +00:00
|
|
|
|
2019-01-02 12:16:12 +00:00
|
|
|
spdk_put_io_channel(ch->channel);
|
2018-01-21 23:03:27 +00:00
|
|
|
|
2019-01-02 12:16:12 +00:00
|
|
|
shared_resource = ch->shared_resource;
|
2018-01-21 23:03:27 +00:00
|
|
|
|
2018-05-04 07:58:01 +00:00
|
|
|
assert(ch->io_outstanding == 0);
|
2019-01-02 12:16:12 +00:00
|
|
|
assert(shared_resource->ref > 0);
|
|
|
|
shared_resource->ref--;
|
|
|
|
if (shared_resource->ref == 0) {
|
|
|
|
assert(shared_resource->io_outstanding == 0);
|
|
|
|
TAILQ_REMOVE(&shared_resource->mgmt_ch->shared_resources, shared_resource, link);
|
|
|
|
spdk_put_io_channel(spdk_io_channel_from_ctx(shared_resource->mgmt_ch));
|
|
|
|
free(shared_resource);
|
2018-01-21 23:03:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
/* Caller must hold bdev->internal.mutex. */
|
2018-07-12 06:35:05 +00:00
|
|
|
static void
|
2018-04-06 17:53:52 +00:00
|
|
|
_spdk_bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch)
|
2017-12-28 03:11:55 +00:00
|
|
|
{
|
2018-09-04 15:01:51 +00:00
|
|
|
struct spdk_bdev_qos *qos = bdev->internal.qos;
|
|
|
|
int i;
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
/* Rate limiting on this bdev enabled */
|
|
|
|
if (qos) {
|
|
|
|
if (qos->ch == NULL) {
|
|
|
|
struct spdk_io_channel *io_ch;
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Selecting channel %p as QoS channel for bdev %s on thread %p\n", ch,
|
|
|
|
bdev->name, spdk_get_thread());
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
/* No qos channel has been selected, so set one up */
|
2018-04-10 22:01:55 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
/* Take another reference to ch */
|
|
|
|
io_ch = spdk_get_io_channel(__bdev_to_io_dev(bdev));
|
2018-11-22 08:39:40 +00:00
|
|
|
assert(io_ch != NULL);
|
2018-04-06 17:53:52 +00:00
|
|
|
qos->ch = ch;
|
2018-04-10 23:23:37 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
qos->thread = spdk_io_channel_get_thread(io_ch);
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
TAILQ_INIT(&qos->queued);
|
2018-09-04 15:01:51 +00:00
|
|
|
|
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) {
|
|
|
|
qos->rate_limits[i].min_per_timeslice =
|
|
|
|
SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE;
|
|
|
|
} else {
|
|
|
|
qos->rate_limits[i].min_per_timeslice =
|
|
|
|
SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (qos->rate_limits[i].limit == 0) {
|
|
|
|
qos->rate_limits[i].limit = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
|
|
|
|
}
|
|
|
|
}
|
2018-06-05 08:21:14 +00:00
|
|
|
spdk_bdev_qos_update_max_quota_per_timeslice(qos);
|
2018-08-27 19:17:31 +00:00
|
|
|
qos->timeslice_size =
|
|
|
|
SPDK_BDEV_QOS_TIMESLICE_IN_USEC * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
|
|
|
|
qos->last_timeslice = spdk_get_ticks();
|
2018-04-06 17:53:52 +00:00
|
|
|
qos->poller = spdk_poller_register(spdk_bdev_channel_poll_qos,
|
|
|
|
qos,
|
|
|
|
SPDK_BDEV_QOS_TIMESLICE_IN_USEC);
|
2017-12-29 08:02:08 +00:00
|
|
|
}
|
2018-04-06 17:53:52 +00:00
|
|
|
|
2017-12-29 08:02:08 +00:00
|
|
|
ch->flags |= BDEV_CH_QOS_ENABLED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-21 23:03:27 +00:00
|
|
|
static int
|
|
|
|
spdk_bdev_channel_create(void *io_device, void *ctx_buf)
|
|
|
|
{
|
2017-12-28 03:11:55 +00:00
|
|
|
struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
|
2018-01-21 23:03:27 +00:00
|
|
|
struct spdk_bdev_channel *ch = ctx_buf;
|
2018-04-06 17:53:52 +00:00
|
|
|
struct spdk_io_channel *mgmt_io_ch;
|
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_ch;
|
|
|
|
struct spdk_bdev_shared_resource *shared_resource;
|
2018-01-21 23:03:27 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
ch->bdev = bdev;
|
|
|
|
ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt);
|
|
|
|
if (!ch->channel) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-11-19 13:31:19 +00:00
|
|
|
assert(ch->histogram == NULL);
|
|
|
|
if (bdev->internal.histogram_enabled) {
|
|
|
|
ch->histogram = spdk_histogram_data_alloc();
|
|
|
|
if (ch->histogram == NULL) {
|
|
|
|
SPDK_ERRLOG("Could not allocate histogram\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
mgmt_io_ch = spdk_get_io_channel(&g_bdev_mgr);
|
|
|
|
if (!mgmt_io_ch) {
|
2019-01-02 13:20:36 +00:00
|
|
|
spdk_put_io_channel(ch->channel);
|
2018-01-21 23:03:27 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
mgmt_ch = spdk_io_channel_get_ctx(mgmt_io_ch);
|
|
|
|
TAILQ_FOREACH(shared_resource, &mgmt_ch->shared_resources, link) {
|
|
|
|
if (shared_resource->shared_ch == ch->channel) {
|
|
|
|
spdk_put_io_channel(mgmt_io_ch);
|
|
|
|
shared_resource->ref++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (shared_resource == NULL) {
|
|
|
|
shared_resource = calloc(1, sizeof(*shared_resource));
|
|
|
|
if (shared_resource == NULL) {
|
2019-01-02 13:20:36 +00:00
|
|
|
spdk_put_io_channel(ch->channel);
|
2018-04-06 17:53:52 +00:00
|
|
|
spdk_put_io_channel(mgmt_io_ch);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
shared_resource->mgmt_ch = mgmt_ch;
|
|
|
|
shared_resource->io_outstanding = 0;
|
|
|
|
TAILQ_INIT(&shared_resource->nomem_io);
|
|
|
|
shared_resource->nomem_threshold = 0;
|
|
|
|
shared_resource->shared_ch = ch->channel;
|
|
|
|
shared_resource->ref = 1;
|
|
|
|
TAILQ_INSERT_TAIL(&mgmt_ch->shared_resources, shared_resource, link);
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(&ch->stat, 0, sizeof(ch->stat));
|
2018-05-23 18:30:01 +00:00
|
|
|
ch->stat.ticks_rate = spdk_get_ticks_hz();
|
2018-04-06 17:53:52 +00:00
|
|
|
ch->io_outstanding = 0;
|
|
|
|
TAILQ_INIT(&ch->queued_resets);
|
|
|
|
ch->flags = 0;
|
|
|
|
ch->shared_resource = shared_resource;
|
|
|
|
|
2017-05-30 08:45:46 +00:00
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
{
|
|
|
|
char *name;
|
2017-06-15 16:59:02 +00:00
|
|
|
__itt_init_ittlib(NULL, 0);
|
2017-05-30 08:45:46 +00:00
|
|
|
name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch);
|
|
|
|
if (!name) {
|
2018-02-04 20:39:55 +00:00
|
|
|
_spdk_bdev_channel_destroy_resource(ch);
|
2017-05-30 08:45:46 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
ch->handle = __itt_string_handle_create(name);
|
|
|
|
free(name);
|
|
|
|
ch->start_tsc = spdk_get_ticks();
|
|
|
|
ch->interval_tsc = spdk_get_ticks_hz() / 100;
|
2018-05-23 18:30:01 +00:00
|
|
|
memset(&ch->prev_stat, 0, sizeof(ch->prev_stat));
|
2017-05-30 08:45:46 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
2018-07-12 06:35:05 +00:00
|
|
|
_spdk_bdev_enable_qos(bdev, ch);
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2018-03-09 21:46:47 +00:00
|
|
|
|
2017-04-04 21:01:54 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-09-12 19:14:48 +00:00
|
|
|
/*
|
|
|
|
* Abort I/O that are waiting on a data buffer. These types of I/O are
|
2018-05-31 17:06:30 +00:00
|
|
|
* linked using the spdk_bdev_io internal.buf_link TAILQ_ENTRY.
|
2017-09-12 19:14:48 +00:00
|
|
|
*/
|
2017-04-04 21:01:54 +00:00
|
|
|
static void
|
2018-01-05 21:55:38 +00:00
|
|
|
_spdk_bdev_abort_buf_io(bdev_io_stailq_t *queue, struct spdk_bdev_channel *ch)
|
2017-04-04 21:01:54 +00:00
|
|
|
{
|
2018-01-05 21:55:38 +00:00
|
|
|
bdev_io_stailq_t tmp;
|
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
|
|
|
|
STAILQ_INIT(&tmp);
|
2017-05-11 16:48:07 +00:00
|
|
|
|
2018-01-05 21:55:38 +00:00
|
|
|
while (!STAILQ_EMPTY(queue)) {
|
|
|
|
bdev_io = STAILQ_FIRST(queue);
|
2018-05-31 17:06:30 +00:00
|
|
|
STAILQ_REMOVE_HEAD(queue, internal.buf_link);
|
2018-06-20 17:54:48 +00:00
|
|
|
if (bdev_io->internal.ch == ch) {
|
2017-05-11 16:48:07 +00:00
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
2018-01-05 21:55:38 +00:00
|
|
|
} else {
|
2018-05-31 17:06:30 +00:00
|
|
|
STAILQ_INSERT_TAIL(&tmp, bdev_io, internal.buf_link);
|
2017-05-11 16:48:07 +00:00
|
|
|
}
|
|
|
|
}
|
2018-01-05 21:55:38 +00:00
|
|
|
|
|
|
|
STAILQ_SWAP(&tmp, queue, spdk_bdev_io);
|
2017-05-25 20:11:33 +00:00
|
|
|
}
|
2017-05-11 16:48:07 +00:00
|
|
|
|
2017-09-12 19:14:48 +00:00
|
|
|
/*
|
|
|
|
* Abort I/O that are queued waiting for submission. These types of I/O are
|
|
|
|
* linked using the spdk_bdev_io link TAILQ_ENTRY.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_spdk_bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_io *bdev_io, *tmp;
|
|
|
|
|
2018-06-20 20:58:59 +00:00
|
|
|
TAILQ_FOREACH_SAFE(bdev_io, queue, internal.link, tmp) {
|
2018-06-20 17:54:48 +00:00
|
|
|
if (bdev_io->internal.ch == ch) {
|
2018-06-20 20:58:59 +00:00
|
|
|
TAILQ_REMOVE(queue, bdev_io, internal.link);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
/*
|
|
|
|
* spdk_bdev_io_complete() assumes that the completed I/O had
|
|
|
|
* been submitted to the bdev module. Since in this case it
|
|
|
|
* hadn't, bump io_outstanding to account for the decrement
|
|
|
|
* that spdk_bdev_io_complete() will do.
|
|
|
|
*/
|
|
|
|
if (bdev_io->type != SPDK_BDEV_IO_TYPE_RESET) {
|
2018-03-16 12:20:55 +00:00
|
|
|
ch->io_outstanding++;
|
2018-05-04 08:10:52 +00:00
|
|
|
ch->shared_resource->io_outstanding++;
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
}
|
2017-09-12 19:14:48 +00:00
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-28 03:11:55 +00:00
|
|
|
static void
|
2018-04-10 23:23:37 +00:00
|
|
|
spdk_bdev_qos_channel_destroy(void *cb_arg)
|
2017-12-28 03:11:55 +00:00
|
|
|
{
|
2018-04-24 22:44:14 +00:00
|
|
|
struct spdk_bdev_qos *qos = cb_arg;
|
2018-04-10 23:23:37 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
|
2018-04-24 22:44:14 +00:00
|
|
|
spdk_poller_unregister(&qos->poller);
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Free QoS %p.\n", qos);
|
|
|
|
|
2018-04-24 22:44:14 +00:00
|
|
|
free(qos);
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
|
|
|
|
2018-05-02 17:19:48 +00:00
|
|
|
static int
|
|
|
|
spdk_bdev_qos_destroy(struct spdk_bdev *bdev)
|
|
|
|
{
|
2018-09-04 15:01:51 +00:00
|
|
|
int i;
|
|
|
|
|
2018-05-02 17:19:48 +00:00
|
|
|
/*
|
|
|
|
* Cleanly shutting down the QoS poller is tricky, because
|
2018-04-06 17:53:52 +00:00
|
|
|
* during the asynchronous operation the user could open
|
|
|
|
* a new descriptor and create a new channel, spawning
|
|
|
|
* a new QoS poller.
|
2018-05-02 17:19:48 +00:00
|
|
|
*
|
|
|
|
* The strategy is to create a new QoS structure here and swap it
|
|
|
|
* in. The shutdown path then continues to refer to the old one
|
|
|
|
* until it completes and then releases it.
|
|
|
|
*/
|
|
|
|
struct spdk_bdev_qos *new_qos, *old_qos;
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
old_qos = bdev->internal.qos;
|
2018-05-02 17:19:48 +00:00
|
|
|
|
|
|
|
new_qos = calloc(1, sizeof(*new_qos));
|
|
|
|
if (!new_qos) {
|
|
|
|
SPDK_ERRLOG("Unable to allocate memory to shut down QoS.\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Copy the old QoS data into the newly allocated structure */
|
|
|
|
memcpy(new_qos, old_qos, sizeof(*new_qos));
|
|
|
|
|
|
|
|
/* Zero out the key parts of the QoS structure */
|
|
|
|
new_qos->ch = NULL;
|
|
|
|
new_qos->thread = NULL;
|
|
|
|
new_qos->poller = NULL;
|
|
|
|
TAILQ_INIT(&new_qos->queued);
|
2018-09-04 15:01:51 +00:00
|
|
|
/*
|
|
|
|
* The limit member of spdk_bdev_qos_limit structure is not zeroed.
|
|
|
|
* It will be used later for the new QoS structure.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
new_qos->rate_limits[i].remaining_this_timeslice = 0;
|
|
|
|
new_qos->rate_limits[i].min_per_timeslice = 0;
|
|
|
|
new_qos->rate_limits[i].max_per_timeslice = 0;
|
|
|
|
}
|
2018-05-02 17:19:48 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev->internal.qos = new_qos;
|
2018-05-02 17:19:48 +00:00
|
|
|
|
2018-07-10 13:01:08 +00:00
|
|
|
if (old_qos->thread == NULL) {
|
|
|
|
free(old_qos);
|
|
|
|
} else {
|
|
|
|
spdk_thread_send_msg(old_qos->thread, spdk_bdev_qos_channel_destroy,
|
|
|
|
old_qos);
|
|
|
|
}
|
2018-05-02 17:19:48 +00:00
|
|
|
|
|
|
|
/* It is safe to continue with destroying the bdev even though the QoS channel hasn't
|
|
|
|
* been destroyed yet. The destruction path will end up waiting for the final
|
|
|
|
* channel to be put before it releases resources. */
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-07-31 23:50:16 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_io_stat_add(struct spdk_bdev_io_stat *total, struct spdk_bdev_io_stat *add)
|
|
|
|
{
|
|
|
|
total->bytes_read += add->bytes_read;
|
|
|
|
total->num_read_ops += add->num_read_ops;
|
|
|
|
total->bytes_written += add->bytes_written;
|
|
|
|
total->num_write_ops += add->num_write_ops;
|
2019-01-04 02:45:53 +00:00
|
|
|
total->bytes_unmapped += add->bytes_unmapped;
|
|
|
|
total->num_unmap_ops += add->num_unmap_ops;
|
2018-07-31 23:50:16 +00:00
|
|
|
total->read_latency_ticks += add->read_latency_ticks;
|
|
|
|
total->write_latency_ticks += add->write_latency_ticks;
|
2019-01-04 02:45:53 +00:00
|
|
|
total->unmap_latency_ticks += add->unmap_latency_ticks;
|
2018-07-31 23:50:16 +00:00
|
|
|
}
|
|
|
|
|
2018-01-21 23:03:27 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_channel_destroy(void *io_device, void *ctx_buf)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_channel *ch = ctx_buf;
|
2018-04-06 17:53:52 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_ch;
|
|
|
|
struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
|
2018-01-21 23:03:27 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name,
|
|
|
|
spdk_get_thread());
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-08-01 18:50:38 +00:00
|
|
|
/* This channel is going away, so add its statistics into the bdev so that they don't get lost. */
|
|
|
|
pthread_mutex_lock(&ch->bdev->internal.mutex);
|
|
|
|
_spdk_bdev_io_stat_add(&ch->bdev->internal.stat, &ch->stat);
|
|
|
|
pthread_mutex_unlock(&ch->bdev->internal.mutex);
|
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
mgmt_ch = shared_resource->mgmt_ch;
|
|
|
|
|
|
|
|
_spdk_bdev_abort_queued_io(&ch->queued_resets, ch);
|
|
|
|
_spdk_bdev_abort_queued_io(&shared_resource->nomem_io, ch);
|
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_small, ch);
|
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_large, ch);
|
|
|
|
|
2018-11-19 13:31:19 +00:00
|
|
|
if (ch->histogram) {
|
|
|
|
spdk_histogram_data_free(ch->histogram);
|
|
|
|
}
|
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
_spdk_bdev_channel_destroy_resource(ch);
|
2018-01-21 23:03:27 +00:00
|
|
|
}
|
|
|
|
|
2017-11-29 15:13:17 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_alias *tmp;
|
|
|
|
|
|
|
|
if (alias == NULL) {
|
|
|
|
SPDK_ERRLOG("Empty alias passed\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (spdk_bdev_get_by_name(alias)) {
|
|
|
|
SPDK_ERRLOG("Bdev name/alias: %s already exists\n", alias);
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp = calloc(1, sizeof(*tmp));
|
|
|
|
if (tmp == NULL) {
|
|
|
|
SPDK_ERRLOG("Unable to allocate alias\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp->alias = strdup(alias);
|
|
|
|
if (tmp->alias == NULL) {
|
|
|
|
free(tmp);
|
|
|
|
SPDK_ERRLOG("Unable to allocate alias\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
TAILQ_INSERT_TAIL(&bdev->aliases, tmp, tailq);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_alias *tmp;
|
|
|
|
|
|
|
|
TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
|
|
|
|
if (strcmp(alias, tmp->alias) == 0) {
|
|
|
|
TAILQ_REMOVE(&bdev->aliases, tmp, tailq);
|
|
|
|
free(tmp->alias);
|
|
|
|
free(tmp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SPDK_INFOLOG(SPDK_LOG_BDEV, "Alias %s does not exists\n", alias);
|
|
|
|
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2018-08-14 08:39:27 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_alias_del_all(struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_alias *p, *tmp;
|
|
|
|
|
|
|
|
TAILQ_FOREACH_SAFE(p, &bdev->aliases, tailq, tmp) {
|
|
|
|
TAILQ_REMOVE(&bdev->aliases, p, tailq);
|
|
|
|
free(p->alias);
|
|
|
|
free(p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-16 19:53:32 +00:00
|
|
|
struct spdk_io_channel *
|
2017-06-29 18:23:50 +00:00
|
|
|
spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc)
|
2016-09-16 19:53:32 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
return spdk_get_io_channel(__bdev_to_io_dev(spdk_bdev_desc_get_bdev(desc)));
|
2016-09-16 19:53:32 +00:00
|
|
|
}
|
|
|
|
|
2017-05-10 20:29:31 +00:00
|
|
|
const char *
|
|
|
|
spdk_bdev_get_name(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->name;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *
|
|
|
|
spdk_bdev_get_product_name(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->product_name;
|
|
|
|
}
|
|
|
|
|
2017-12-07 10:40:18 +00:00
|
|
|
const struct spdk_bdev_aliases_list *
|
|
|
|
spdk_bdev_get_aliases(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return &bdev->aliases;
|
|
|
|
}
|
|
|
|
|
2017-05-12 17:29:00 +00:00
|
|
|
uint32_t
|
|
|
|
spdk_bdev_get_block_size(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->blocklen;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->blockcnt;
|
|
|
|
}
|
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
const char *
|
|
|
|
spdk_bdev_get_qos_rpc_type(enum spdk_bdev_qos_rate_limit_type type)
|
|
|
|
{
|
|
|
|
return qos_rpc_type[type];
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spdk_bdev_get_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
|
2017-12-29 21:45:15 +00:00
|
|
|
{
|
2018-09-04 15:01:51 +00:00
|
|
|
int i;
|
|
|
|
|
|
|
|
memset(limits, 0, sizeof(*limits) * SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
|
2018-04-24 22:44:14 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
if (bdev->internal.qos) {
|
2018-09-04 15:01:51 +00:00
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (bdev->internal.qos->rate_limits[i].limit !=
|
|
|
|
SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
|
|
|
|
limits[i] = bdev->internal.qos->rate_limits[i].limit;
|
2018-06-22 02:15:02 +00:00
|
|
|
if (_spdk_bdev_qos_is_iops_rate_limit(i) == false) {
|
|
|
|
/* Change from Byte to Megabyte which is user visible. */
|
|
|
|
limits[i] = limits[i] / 1024 / 1024;
|
|
|
|
}
|
2018-09-04 15:01:51 +00:00
|
|
|
}
|
|
|
|
}
|
2018-04-24 22:44:14 +00:00
|
|
|
}
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-12-29 21:45:15 +00:00
|
|
|
}
|
|
|
|
|
2017-05-09 17:57:32 +00:00
|
|
|
size_t
|
|
|
|
spdk_bdev_get_buf_align(const struct spdk_bdev *bdev)
|
|
|
|
{
|
2018-10-24 07:02:31 +00:00
|
|
|
return 1 << bdev->required_alignment;
|
2017-05-09 17:57:32 +00:00
|
|
|
}
|
|
|
|
|
2017-08-16 20:56:10 +00:00
|
|
|
uint32_t
|
|
|
|
spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->optimal_io_boundary;
|
|
|
|
}
|
|
|
|
|
2017-05-16 19:57:33 +00:00
|
|
|
bool
|
|
|
|
spdk_bdev_has_write_cache(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->write_cache;
|
|
|
|
}
|
|
|
|
|
2018-03-02 01:27:44 +00:00
|
|
|
const struct spdk_uuid *
|
|
|
|
spdk_bdev_get_uuid(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return &bdev->uuid;
|
|
|
|
}
|
|
|
|
|
2019-02-04 08:21:20 +00:00
|
|
|
uint32_t
|
|
|
|
spdk_bdev_get_md_size(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->md_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return (bdev->md_len != 0) && bdev->md_interleave;
|
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
bool
|
|
|
|
spdk_bdev_is_md_separate(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return (bdev->md_len != 0) && !bdev->md_interleave;
|
|
|
|
}
|
|
|
|
|
2019-03-03 22:28:23 +00:00
|
|
|
uint32_t
|
|
|
|
spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
if (spdk_bdev_is_md_interleaved(bdev)) {
|
|
|
|
return bdev->blocklen - bdev->md_len;
|
|
|
|
} else {
|
|
|
|
return bdev->blocklen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-11 12:31:15 +00:00
|
|
|
static uint32_t
|
|
|
|
_bdev_get_block_size_with_md(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
if (!spdk_bdev_is_md_interleaved(bdev)) {
|
|
|
|
return bdev->blocklen + bdev->md_len;
|
|
|
|
} else {
|
|
|
|
return bdev->blocklen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-05 00:50:56 +00:00
|
|
|
enum spdk_dif_type spdk_bdev_get_dif_type(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
if (bdev->md_len != 0) {
|
|
|
|
return bdev->dif_type;
|
|
|
|
} else {
|
|
|
|
return SPDK_DIF_DISABLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
spdk_bdev_is_dif_head_of_md(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
|
|
|
|
return bdev->dif_is_head_of_md;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-07 08:41:06 +00:00
|
|
|
bool
|
|
|
|
spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev,
|
|
|
|
enum spdk_dif_check_type check_type)
|
|
|
|
{
|
|
|
|
if (spdk_bdev_get_dif_type(bdev) == SPDK_DIF_DISABLE) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (check_type) {
|
|
|
|
case SPDK_DIF_CHECK_TYPE_REFTAG:
|
|
|
|
return (bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK) != 0;
|
|
|
|
case SPDK_DIF_CHECK_TYPE_APPTAG:
|
|
|
|
return (bdev->dif_check_flags & SPDK_DIF_FLAGS_APPTAG_CHECK) != 0;
|
|
|
|
case SPDK_DIF_CHECK_TYPE_GUARD:
|
|
|
|
return (bdev->dif_check_flags & SPDK_DIF_FLAGS_GUARD_CHECK) != 0;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-05 17:44:30 +00:00
|
|
|
uint64_t
|
|
|
|
spdk_bdev_get_qd(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->internal.measured_queue_depth;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
spdk_bdev_get_qd_sampling_period(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->internal.period;
|
|
|
|
}
|
|
|
|
|
2018-07-11 22:06:17 +00:00
|
|
|
uint64_t
|
|
|
|
spdk_bdev_get_weighted_io_time(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->internal.weighted_io_time;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
spdk_bdev_get_io_time(const struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
return bdev->internal.io_time;
|
|
|
|
}
|
|
|
|
|
2018-07-05 17:44:30 +00:00
|
|
|
static void
|
|
|
|
_calculate_measured_qd_cpl(struct spdk_io_channel_iter *i, int status)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
|
|
|
|
bdev->internal.measured_queue_depth = bdev->internal.temporary_queue_depth;
|
2018-07-11 22:06:17 +00:00
|
|
|
|
|
|
|
if (bdev->internal.measured_queue_depth) {
|
2018-08-21 21:04:39 +00:00
|
|
|
bdev->internal.io_time += bdev->internal.period;
|
2018-07-11 22:06:17 +00:00
|
|
|
bdev->internal.weighted_io_time += bdev->internal.period * bdev->internal.measured_queue_depth;
|
|
|
|
}
|
2018-07-05 17:44:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_calculate_measured_qd(struct spdk_io_channel_iter *i)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
struct spdk_io_channel *io_ch = spdk_io_channel_iter_get_channel(i);
|
|
|
|
struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(io_ch);
|
|
|
|
|
|
|
|
bdev->internal.temporary_queue_depth += ch->io_outstanding;
|
|
|
|
spdk_for_each_channel_continue(i, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
spdk_bdev_calculate_measured_queue_depth(void *ctx)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = ctx;
|
|
|
|
bdev->internal.temporary_queue_depth = 0;
|
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev), _calculate_measured_qd, bdev,
|
|
|
|
_calculate_measured_qd_cpl);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spdk_bdev_set_qd_sampling_period(struct spdk_bdev *bdev, uint64_t period)
|
|
|
|
{
|
|
|
|
bdev->internal.period = period;
|
|
|
|
|
|
|
|
if (bdev->internal.qd_poller != NULL) {
|
|
|
|
spdk_poller_unregister(&bdev->internal.qd_poller);
|
|
|
|
bdev->internal.measured_queue_depth = UINT64_MAX;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (period != 0) {
|
|
|
|
bdev->internal.qd_poller = spdk_poller_register(spdk_bdev_calculate_measured_queue_depth, bdev,
|
|
|
|
period);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-26 10:00:36 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
2018-01-26 10:00:36 +00:00
|
|
|
|
|
|
|
/* bdev has open descriptors */
|
2018-06-21 20:03:02 +00:00
|
|
|
if (!TAILQ_EMPTY(&bdev->internal.open_descs) &&
|
2018-01-26 10:00:36 +00:00
|
|
|
bdev->blockcnt > size) {
|
|
|
|
ret = -EBUSY;
|
|
|
|
} else {
|
|
|
|
bdev->blockcnt = size;
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2018-01-26 10:00:36 +00:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
/*
|
|
|
|
* Convert I/O offset and length from bytes to blocks.
|
|
|
|
*
|
|
|
|
* Returns zero on success or non-zero if the byte parameters aren't divisible by the block size.
|
|
|
|
*/
|
|
|
|
static uint64_t
|
|
|
|
spdk_bdev_bytes_to_blocks(struct spdk_bdev *bdev, uint64_t offset_bytes, uint64_t *offset_blocks,
|
|
|
|
uint64_t num_bytes, uint64_t *num_blocks)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-08-28 21:55:35 +00:00
|
|
|
uint32_t block_size = bdev->blocklen;
|
2018-12-28 03:12:55 +00:00
|
|
|
uint8_t shift_cnt;
|
|
|
|
|
|
|
|
/* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */
|
|
|
|
if (spdk_likely(spdk_u32_is_pow2(block_size))) {
|
|
|
|
shift_cnt = spdk_u32log2(block_size);
|
|
|
|
*offset_blocks = offset_bytes >> shift_cnt;
|
|
|
|
*num_blocks = num_bytes >> shift_cnt;
|
|
|
|
return (offset_bytes - (*offset_blocks << shift_cnt)) |
|
|
|
|
(num_bytes - (*num_blocks << shift_cnt));
|
|
|
|
} else {
|
|
|
|
*offset_blocks = offset_bytes / block_size;
|
|
|
|
*num_blocks = num_bytes / block_size;
|
|
|
|
return (offset_bytes % block_size) | (num_bytes % block_size);
|
|
|
|
}
|
2017-08-28 21:55:35 +00:00
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
static bool
|
|
|
|
spdk_bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64_t num_blocks)
|
|
|
|
{
|
|
|
|
/* Return failure if offset_blocks + num_blocks is less than offset_blocks; indicates there
|
2016-07-20 18:16:23 +00:00
|
|
|
* has been an overflow and hence the offset has been wrapped around */
|
2017-08-28 21:55:35 +00:00
|
|
|
if (offset_blocks + num_blocks < offset_blocks) {
|
2017-08-04 21:15:46 +00:00
|
|
|
return false;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
/* Return failure if offset_blocks + num_blocks exceeds the size of the bdev */
|
|
|
|
if (offset_blocks + num_blocks > bdev->blockcnt) {
|
2017-08-04 21:15:46 +00:00
|
|
|
return false;
|
2016-10-18 13:45:01 +00:00
|
|
|
}
|
|
|
|
|
2017-08-04 21:15:46 +00:00
|
|
|
return true;
|
2016-10-18 13:45:01 +00:00
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
static bool
|
|
|
|
_bdev_io_check_md_buf(const struct iovec *iovs, const void *md_buf)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
2019-04-16 08:12:09 +00:00
|
|
|
return _is_buf_allocated(iovs) == (md_buf != NULL);
|
2017-08-28 21:55:35 +00:00
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
static int
|
|
|
|
_spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf,
|
|
|
|
void *md_buf, int64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-10-18 13:45:01 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2016-10-18 13:45:01 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-10-18 13:45:01 +00:00
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
|
2018-06-25 18:03:11 +00:00
|
|
|
bdev_io->u.bdev.iovs = &bdev_io->iov;
|
|
|
|
bdev_io->u.bdev.iovs[0].iov_base = buf;
|
|
|
|
bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iovcnt = 1;
|
2019-04-16 08:12:09 +00:00
|
|
|
bdev_io->u.bdev.md_buf = md_buf;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2016-10-04 14:39:27 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-10-04 14:39:27 +00:00
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
void *buf, uint64_t offset, uint64_t nbytes,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
2019-06-05 08:57:19 +00:00
|
|
|
if (spdk_bdev_bytes_to_blocks(spdk_bdev_desc_get_bdev(desc), offset, &offset_blocks,
|
|
|
|
nbytes, &num_blocks) != 0) {
|
2019-04-16 08:12:09 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
void *buf, uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
return _spdk_bdev_read_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks,
|
|
|
|
cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
void *buf, void *md_buf, int64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
struct iovec iov = {
|
|
|
|
.iov_base = buf,
|
|
|
|
};
|
|
|
|
|
2019-06-05 08:57:19 +00:00
|
|
|
if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
|
2019-04-16 08:12:09 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!_bdev_io_check_md_buf(&iov, md_buf)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return _spdk_bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
|
|
|
|
cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2016-10-04 14:39:27 +00:00
|
|
|
struct iovec *iov, int iovcnt,
|
|
|
|
uint64_t offset, uint64_t nbytes,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
2019-06-05 08:57:19 +00:00
|
|
|
if (spdk_bdev_bytes_to_blocks(spdk_bdev_desc_get_bdev(desc), offset, &offset_blocks,
|
|
|
|
nbytes, &num_blocks) != 0) {
|
2017-08-28 21:55:35 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
static int
|
|
|
|
_spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
|
|
|
|
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-10-04 14:39:27 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2016-10-04 14:39:27 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-10-04 14:39:27 +00:00
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-10-04 14:39:27 +00:00
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2016-10-04 14:39:27 +00:00
|
|
|
if (!bdev_io) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-10-04 14:39:27 +00:00
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2016-10-04 14:39:27 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iovs = iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = iovcnt;
|
2019-04-16 08:12:09 +00:00
|
|
|
bdev_io->u.bdev.md_buf = md_buf;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
return _spdk_bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
|
|
|
|
num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2019-04-16 08:12:09 +00:00
|
|
|
spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt, void *md_buf,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
|
2019-04-16 08:12:09 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2017-08-28 21:55:35 +00:00
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
if (!_bdev_io_check_md_buf(iov, md_buf)) {
|
2017-08-28 21:55:35 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
return _spdk_bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
|
|
|
|
num_blocks, cb, cb_arg);
|
2017-08-28 21:55:35 +00:00
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
static int
|
|
|
|
_spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
|
2018-06-25 18:03:11 +00:00
|
|
|
bdev_io->u.bdev.iovs = &bdev_io->iov;
|
|
|
|
bdev_io->u.bdev.iovs[0].iov_base = buf;
|
|
|
|
bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iovcnt = 1;
|
2019-04-16 08:12:09 +00:00
|
|
|
bdev_io->u.bdev.md_buf = md_buf;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2019-04-16 08:12:09 +00:00
|
|
|
spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
void *buf, uint64_t offset, uint64_t nbytes,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
2019-06-05 08:57:19 +00:00
|
|
|
if (spdk_bdev_bytes_to_blocks(spdk_bdev_desc_get_bdev(desc), offset, &offset_blocks,
|
|
|
|
nbytes, &num_blocks) != 0) {
|
2017-08-28 21:55:35 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
|
2017-08-28 21:55:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2019-04-16 08:12:09 +00:00
|
|
|
spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
void *buf, uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
return _spdk_bdev_write_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks,
|
|
|
|
cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
struct iovec iov = {
|
|
|
|
.iov_base = buf,
|
|
|
|
};
|
|
|
|
|
2019-06-05 08:57:19 +00:00
|
|
|
if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
|
2019-04-16 08:12:09 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!_bdev_io_check_md_buf(&iov, md_buf)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return _spdk_bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
|
|
|
|
cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
_spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt, void *md_buf,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iovs = iov;
|
|
|
|
bdev_io->u.bdev.iovcnt = iovcnt;
|
2019-04-16 08:12:09 +00:00
|
|
|
bdev_io->u.bdev.md_buf = md_buf;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt,
|
|
|
|
uint64_t offset, uint64_t len,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
2019-06-05 08:57:19 +00:00
|
|
|
if (spdk_bdev_bytes_to_blocks(spdk_bdev_desc_get_bdev(desc), offset, &offset_blocks,
|
|
|
|
len, &num_blocks) != 0) {
|
2019-04-16 08:12:09 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_writev_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
return _spdk_bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
|
|
|
|
num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
struct iovec *iov, int iovcnt, void *md_buf,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
|
2019-04-16 08:12:09 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!_bdev_io_check_md_buf(iov, md_buf)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return _spdk_bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
|
|
|
|
num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
2019-04-02 18:25:10 +00:00
|
|
|
static void
|
|
|
|
bdev_zcopy_get_buf(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
|
|
|
|
{
|
|
|
|
if (!success) {
|
|
|
|
/* Don't use spdk_bdev_io_complete here - this bdev_io was never actually submitted. */
|
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_NOMEM;
|
|
|
|
bdev_io->internal.cb(bdev_io, success, bdev_io->internal.caller_ctx);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdev_io->u.bdev.zcopy.populate) {
|
|
|
|
/* Read the real data into the buffer */
|
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
|
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
|
|
|
|
spdk_bdev_io_submit(bdev_io);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't use spdk_bdev_io_complete here - this bdev_io was never actually submitted. */
|
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
|
|
|
bdev_io->internal.cb(bdev_io, success, bdev_io->internal.caller_ctx);
|
|
|
|
}
|
|
|
|
|
2017-11-07 22:05:19 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
bool populate,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2017-11-07 22:05:19 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY)) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
|
|
|
if (!bdev_io) {
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io->internal.ch = channel;
|
|
|
|
bdev_io->internal.desc = desc;
|
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_ZCOPY;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2019-04-02 18:25:10 +00:00
|
|
|
bdev_io->u.bdev.iovs = NULL;
|
|
|
|
bdev_io->u.bdev.iovcnt = 0;
|
2017-11-07 22:05:19 +00:00
|
|
|
bdev_io->u.bdev.zcopy.populate = populate ? 1 : 0;
|
|
|
|
bdev_io->u.bdev.zcopy.commit = 0;
|
|
|
|
bdev_io->u.bdev.zcopy.start = 1;
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2019-04-02 18:25:10 +00:00
|
|
|
if (_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY)) {
|
|
|
|
spdk_bdev_io_submit(bdev_io);
|
|
|
|
} else {
|
|
|
|
/* Emulate zcopy by allocating a buffer */
|
|
|
|
spdk_bdev_io_get_buf(bdev_io, bdev_zcopy_get_buf,
|
|
|
|
bdev_io->u.bdev.num_blocks * bdev->blocklen);
|
|
|
|
}
|
|
|
|
|
2017-11-07 22:05:19 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_zcopy_end(struct spdk_bdev_io *bdev_io, bool commit,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2019-04-02 18:25:10 +00:00
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
|
|
|
|
|
|
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
|
|
|
|
/* This can happen if the zcopy was emulated in start */
|
|
|
|
if (bdev_io->u.bdev.zcopy.start != 1) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_ZCOPY;
|
|
|
|
}
|
|
|
|
|
2017-11-07 22:05:19 +00:00
|
|
|
if (bdev_io->type != SPDK_BDEV_IO_TYPE_ZCOPY) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io->u.bdev.zcopy.commit = commit ? 1 : 0;
|
|
|
|
bdev_io->u.bdev.zcopy.start = 0;
|
|
|
|
bdev_io->internal.caller_ctx = cb_arg;
|
|
|
|
bdev_io->internal.cb = cb;
|
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
|
|
|
|
|
2019-04-02 18:25:10 +00:00
|
|
|
if (_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY)) {
|
|
|
|
spdk_bdev_io_submit(bdev_io);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!bdev_io->u.bdev.zcopy.commit) {
|
|
|
|
/* Don't use spdk_bdev_io_complete here - this bdev_io was never actually submitted. */
|
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
|
|
|
bdev_io->internal.cb(bdev_io, true, bdev_io->internal.caller_ctx);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
|
2017-11-07 22:05:19 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2019-04-02 18:25:10 +00:00
|
|
|
|
2017-11-07 22:05:19 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-01 18:28:29 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
uint64_t offset, uint64_t len,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
2019-06-05 08:57:19 +00:00
|
|
|
if (spdk_bdev_bytes_to_blocks(spdk_bdev_desc_get_bdev(desc), offset, &offset_blocks,
|
|
|
|
len, &num_blocks) != 0) {
|
2017-08-28 21:55:35 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-01 18:28:29 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2017-08-01 18:28:29 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2017-07-28 22:34:24 +00:00
|
|
|
|
2018-06-12 16:02:00 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-08-01 18:28:29 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-05-16 00:49:57 +00:00
|
|
|
if (!_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES) &&
|
|
|
|
!_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE)) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2017-07-28 22:34:24 +00:00
|
|
|
|
2017-08-01 18:28:29 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2018-08-23 22:08:17 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
2018-08-23 22:08:17 +00:00
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
2017-08-01 18:28:29 +00:00
|
|
|
|
2018-06-21 16:34:42 +00:00
|
|
|
if (_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
|
2018-08-23 22:08:17 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
|
|
|
return 0;
|
2017-07-28 22:34:24 +00:00
|
|
|
}
|
2019-05-16 00:49:57 +00:00
|
|
|
|
|
|
|
assert(_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE));
|
2019-06-11 12:31:15 +00:00
|
|
|
assert(_bdev_get_block_size_with_md(bdev) <= ZERO_BUFFER_SIZE);
|
2019-05-16 00:49:57 +00:00
|
|
|
bdev_io->u.bdev.split_remaining_num_blocks = num_blocks;
|
|
|
|
bdev_io->u.bdev.split_current_offset_blocks = offset_blocks;
|
|
|
|
_spdk_bdev_write_zero_buffer_next(bdev_io);
|
|
|
|
|
|
|
|
return 0;
|
2017-08-01 18:28:29 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2017-07-19 21:32:04 +00:00
|
|
|
uint64_t offset, uint64_t nbytes,
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
2019-06-05 08:57:19 +00:00
|
|
|
if (spdk_bdev_bytes_to_blocks(spdk_bdev_desc_get_bdev(desc), offset, &offset_blocks,
|
|
|
|
nbytes, &num_blocks) != 0) {
|
2017-08-28 21:55:35 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-10-05 20:57:49 +00:00
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (num_blocks == 0) {
|
2017-07-19 21:32:04 +00:00
|
|
|
SPDK_ERRLOG("Can't unmap 0 bytes\n");
|
2017-06-05 18:39:38 +00:00
|
|
|
return -EINVAL;
|
2016-10-05 20:57:49 +00:00
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP;
|
2018-06-25 18:03:11 +00:00
|
|
|
|
|
|
|
bdev_io->u.bdev.iovs = &bdev_io->iov;
|
|
|
|
bdev_io->u.bdev.iovs[0].iov_base = NULL;
|
|
|
|
bdev_io->u.bdev.iovs[0].iov_len = 0;
|
2017-09-08 13:07:10 +00:00
|
|
|
bdev_io->u.bdev.iovcnt = 1;
|
2018-06-25 18:03:11 +00:00
|
|
|
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2016-07-20 18:16:23 +00:00
|
|
|
uint64_t offset, uint64_t length,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2017-08-28 21:55:35 +00:00
|
|
|
{
|
|
|
|
uint64_t offset_blocks, num_blocks;
|
|
|
|
|
2019-06-05 08:57:19 +00:00
|
|
|
if (spdk_bdev_bytes_to_blocks(spdk_bdev_desc_get_bdev(desc), offset, &offset_blocks,
|
|
|
|
length, &num_blocks) != 0) {
|
2017-08-28 21:55:35 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return spdk_bdev_flush_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
uint64_t offset_blocks, uint64_t num_blocks,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-04-04 21:01:54 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:55:35 +00:00
|
|
|
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH;
|
2017-09-08 13:07:10 +00:00
|
|
|
bdev_io->u.bdev.iovs = NULL;
|
|
|
|
bdev_io->u.bdev.iovcnt = 0;
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
|
|
|
bdev_io->u.bdev.num_blocks = num_blocks;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-05-25 20:11:33 +00:00
|
|
|
static void
|
2017-12-11 22:14:19 +00:00
|
|
|
_spdk_bdev_reset_dev(struct spdk_io_channel_iter *i, int status)
|
2017-05-25 20:11:33 +00:00
|
|
|
{
|
2017-12-11 22:14:19 +00:00
|
|
|
struct spdk_bdev_channel *ch = spdk_io_channel_iter_get_ctx(i);
|
2017-09-12 16:34:55 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-05-25 20:11:33 +00:00
|
|
|
|
2017-09-12 16:34:55 +00:00
|
|
|
bdev_io = TAILQ_FIRST(&ch->queued_resets);
|
2018-06-20 20:58:59 +00:00
|
|
|
TAILQ_REMOVE(&ch->queued_resets, bdev_io, internal.link);
|
2017-09-14 21:02:09 +00:00
|
|
|
spdk_bdev_io_submit_reset(bdev_io);
|
2017-05-25 20:11:33 +00:00
|
|
|
}
|
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_reset_freeze_channel(struct spdk_io_channel_iter *i)
|
2017-05-25 20:11:33 +00:00
|
|
|
{
|
2018-03-02 19:49:36 +00:00
|
|
|
struct spdk_io_channel *ch;
|
2017-05-25 20:11:33 +00:00
|
|
|
struct spdk_bdev_channel *channel;
|
2017-05-10 21:42:45 +00:00
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_channel;
|
2018-05-04 08:10:52 +00:00
|
|
|
struct spdk_bdev_shared_resource *shared_resource;
|
2018-04-06 17:53:52 +00:00
|
|
|
bdev_io_tailq_t tmp_queued;
|
|
|
|
|
|
|
|
TAILQ_INIT(&tmp_queued);
|
2017-05-25 20:11:33 +00:00
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
ch = spdk_io_channel_iter_get_channel(i);
|
2017-05-25 20:11:33 +00:00
|
|
|
channel = spdk_io_channel_get_ctx(ch);
|
2018-05-04 08:10:52 +00:00
|
|
|
shared_resource = channel->shared_resource;
|
|
|
|
mgmt_channel = shared_resource->mgmt_ch;
|
2017-05-25 20:11:33 +00:00
|
|
|
|
2017-09-08 18:44:50 +00:00
|
|
|
channel->flags |= BDEV_CH_RESET_IN_PROGRESS;
|
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
if ((channel->flags & BDEV_CH_QOS_ENABLED) != 0) {
|
|
|
|
/* The QoS object is always valid and readable while
|
|
|
|
* the channel flag is set, so the lock here should not
|
|
|
|
* be necessary. We're not in the fast path though, so
|
|
|
|
* just take it anyway. */
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&channel->bdev->internal.mutex);
|
|
|
|
if (channel->bdev->internal.qos->ch == channel) {
|
|
|
|
TAILQ_SWAP(&channel->bdev->internal.qos->queued, &tmp_queued, spdk_bdev_io, internal.link);
|
2018-04-06 17:53:52 +00:00
|
|
|
}
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&channel->bdev->internal.mutex);
|
2018-04-06 17:53:52 +00:00
|
|
|
}
|
|
|
|
|
2018-05-04 08:10:52 +00:00
|
|
|
_spdk_bdev_abort_queued_io(&shared_resource->nomem_io, channel);
|
2017-09-12 19:14:48 +00:00
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, channel);
|
|
|
|
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, channel);
|
2018-04-06 17:53:52 +00:00
|
|
|
_spdk_bdev_abort_queued_io(&tmp_queued, channel);
|
2017-11-16 07:42:37 +00:00
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
spdk_for_each_channel_continue(i, 0);
|
2017-05-25 20:11:33 +00:00
|
|
|
}
|
|
|
|
|
2017-06-15 14:17:12 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_start_reset(void *ctx)
|
|
|
|
{
|
2017-09-12 16:34:55 +00:00
|
|
|
struct spdk_bdev_channel *ch = ctx;
|
2017-06-15 14:17:12 +00:00
|
|
|
|
2018-02-19 22:21:15 +00:00
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(ch->bdev), _spdk_bdev_reset_freeze_channel,
|
2017-09-12 16:34:55 +00:00
|
|
|
ch, _spdk_bdev_reset_dev);
|
2017-06-15 14:17:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-09-08 19:34:49 +00:00
|
|
|
_spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch)
|
2017-06-15 14:17:12 +00:00
|
|
|
{
|
2017-09-08 19:34:49 +00:00
|
|
|
struct spdk_bdev *bdev = ch->bdev;
|
2017-06-15 14:17:12 +00:00
|
|
|
|
2017-09-08 19:34:49 +00:00
|
|
|
assert(!TAILQ_EMPTY(&ch->queued_resets));
|
2017-06-15 14:17:12 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
if (bdev->internal.reset_in_progress == NULL) {
|
|
|
|
bdev->internal.reset_in_progress = TAILQ_FIRST(&ch->queued_resets);
|
2017-09-12 16:34:55 +00:00
|
|
|
/*
|
|
|
|
* Take a channel reference for the target bdev for the life of this
|
|
|
|
* reset. This guards against the channel getting destroyed while
|
|
|
|
* spdk_for_each_channel() calls related to this reset IO are in
|
|
|
|
* progress. We will release the reference when this reset is
|
|
|
|
* completed.
|
|
|
|
*/
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev->internal.reset_in_progress->u.reset.ch_ref = spdk_get_io_channel(__bdev_to_io_dev(bdev));
|
2017-09-12 16:34:55 +00:00
|
|
|
_spdk_bdev_start_reset(ch);
|
2017-06-15 14:17:12 +00:00
|
|
|
}
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-06-15 14:17:12 +00:00
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2016-07-20 18:16:23 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2017-05-23 17:51:50 +00:00
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
if (!bdev_io) {
|
2017-09-26 21:45:22 +00:00
|
|
|
return -ENOMEM;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2016-07-20 18:16:23 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_RESET;
|
2017-09-12 16:34:55 +00:00
|
|
|
bdev_io->u.reset.ch_ref = NULL;
|
2016-07-20 18:16:23 +00:00
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
2018-06-20 20:58:59 +00:00
|
|
|
TAILQ_INSERT_TAIL(&channel->queued_resets, bdev_io, internal.link);
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-06-15 14:17:12 +00:00
|
|
|
|
2017-09-08 19:34:49 +00:00
|
|
|
_spdk_bdev_channel_start_reset(channel);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-05-25 20:11:33 +00:00
|
|
|
return 0;
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-04-06 21:40:29 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
|
|
|
|
struct spdk_bdev_io_stat *stat)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
|
|
|
*stat = channel->stat;
|
|
|
|
}
|
|
|
|
|
2017-12-28 09:03:17 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_get_device_stat_done(struct spdk_io_channel_iter *i, int status)
|
|
|
|
{
|
|
|
|
void *io_device = spdk_io_channel_iter_get_io_device(i);
|
|
|
|
struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
|
|
|
|
bdev_iostat_ctx->cb(__bdev_from_io_dev(io_device), bdev_iostat_ctx->stat,
|
|
|
|
bdev_iostat_ctx->cb_arg, 0);
|
|
|
|
free(bdev_iostat_ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_get_each_channel_stat(struct spdk_io_channel_iter *i)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
2018-07-31 23:50:16 +00:00
|
|
|
_spdk_bdev_io_stat_add(bdev_iostat_ctx->stat, &channel->stat);
|
2017-12-28 09:03:17 +00:00
|
|
|
spdk_for_each_channel_continue(i, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spdk_bdev_get_device_stat(struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat,
|
|
|
|
spdk_bdev_get_device_stat_cb cb, void *cb_arg)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_iostat_ctx *bdev_iostat_ctx;
|
|
|
|
|
|
|
|
assert(bdev != NULL);
|
|
|
|
assert(stat != NULL);
|
|
|
|
assert(cb != NULL);
|
|
|
|
|
|
|
|
bdev_iostat_ctx = calloc(1, sizeof(struct spdk_bdev_iostat_ctx));
|
|
|
|
if (bdev_iostat_ctx == NULL) {
|
|
|
|
SPDK_ERRLOG("Unable to allocate memory for spdk_bdev_iostat_ctx\n");
|
|
|
|
cb(bdev, stat, cb_arg, -ENOMEM);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev_iostat_ctx->stat = stat;
|
|
|
|
bdev_iostat_ctx->cb = cb;
|
|
|
|
bdev_iostat_ctx->cb_arg = cb_arg;
|
|
|
|
|
2018-08-01 18:50:38 +00:00
|
|
|
/* Start with the statistics from previously deleted channels. */
|
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
_spdk_bdev_io_stat_add(bdev_iostat_ctx->stat, &bdev->internal.stat);
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
|
|
|
|
|
|
|
/* Then iterate and add the statistics from each existing channel. */
|
2017-12-28 09:03:17 +00:00
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev),
|
|
|
|
_spdk_bdev_get_each_channel_stat,
|
|
|
|
bdev_iostat_ctx,
|
|
|
|
_spdk_bdev_get_device_stat_done);
|
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2017-05-13 20:12:13 +00:00
|
|
|
const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2017-05-13 20:12:13 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2017-05-13 20:12:13 +00:00
|
|
|
if (!bdev_io) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2017-05-13 20:12:13 +00:00
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2017-05-13 20:12:13 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN;
|
|
|
|
bdev_io->u.nvme_passthru.cmd = *cmd;
|
|
|
|
bdev_io->u.nvme_passthru.buf = buf;
|
|
|
|
bdev_io->u.nvme_passthru.nbytes = nbytes;
|
2017-11-14 06:33:11 +00:00
|
|
|
bdev_io->u.nvme_passthru.md_buf = NULL;
|
|
|
|
bdev_io->u.nvme_passthru.md_len = 0;
|
2017-05-13 20:12:13 +00:00
|
|
|
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2017-05-13 20:12:13 +00:00
|
|
|
}
|
|
|
|
|
2017-06-05 18:39:38 +00:00
|
|
|
int
|
2017-07-06 19:39:19 +00:00
|
|
|
spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
2017-06-05 18:02:09 +00:00
|
|
|
const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2017-06-05 18:02:09 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
if (!desc->write) {
|
|
|
|
/*
|
|
|
|
* Do not try to parse the NVMe command - we could maybe use bits in the opcode
|
|
|
|
* to easily determine if the command is a read or write, but for now just
|
|
|
|
* do not allow io_passthru with a read-only descriptor.
|
|
|
|
*/
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2017-06-05 18:02:09 +00:00
|
|
|
if (!bdev_io) {
|
2017-06-05 18:39:38 +00:00
|
|
|
return -ENOMEM;
|
2017-06-05 18:02:09 +00:00
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2017-06-05 18:02:09 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO;
|
|
|
|
bdev_io->u.nvme_passthru.cmd = *cmd;
|
|
|
|
bdev_io->u.nvme_passthru.buf = buf;
|
|
|
|
bdev_io->u.nvme_passthru.nbytes = nbytes;
|
2017-11-14 06:33:11 +00:00
|
|
|
bdev_io->u.nvme_passthru.md_buf = NULL;
|
|
|
|
bdev_io->u.nvme_passthru.md_len = 0;
|
|
|
|
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
|
|
|
spdk_bdev_io_submit(bdev_io);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
|
|
|
const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len,
|
|
|
|
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2017-11-14 06:33:11 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
|
|
|
if (!desc->write) {
|
|
|
|
/*
|
|
|
|
* Do not try to parse the NVMe command - we could maybe use bits in the opcode
|
|
|
|
* to easily determine if the command is a read or write, but for now just
|
|
|
|
* do not allow io_passthru with a read-only descriptor.
|
|
|
|
*/
|
|
|
|
return -EBADF;
|
|
|
|
}
|
|
|
|
|
2018-04-06 20:59:07 +00:00
|
|
|
bdev_io = spdk_bdev_get_io(channel);
|
2017-11-14 06:33:11 +00:00
|
|
|
if (!bdev_io) {
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch = channel;
|
2018-08-17 22:06:26 +00:00
|
|
|
bdev_io->internal.desc = desc;
|
2017-11-14 06:33:11 +00:00
|
|
|
bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO_MD;
|
|
|
|
bdev_io->u.nvme_passthru.cmd = *cmd;
|
|
|
|
bdev_io->u.nvme_passthru.buf = buf;
|
|
|
|
bdev_io->u.nvme_passthru.nbytes = nbytes;
|
|
|
|
bdev_io->u.nvme_passthru.md_buf = md_buf;
|
|
|
|
bdev_io->u.nvme_passthru.md_len = md_len;
|
2017-06-05 18:02:09 +00:00
|
|
|
|
|
|
|
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
|
|
|
|
2017-09-07 21:51:14 +00:00
|
|
|
spdk_bdev_io_submit(bdev_io);
|
2017-06-05 18:39:38 +00:00
|
|
|
return 0;
|
2017-06-05 18:02:09 +00:00
|
|
|
}
|
|
|
|
|
2018-06-12 15:11:31 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
|
|
|
|
struct spdk_bdev_io_wait_entry *entry)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
|
|
|
struct spdk_bdev_mgmt_channel *mgmt_ch = channel->shared_resource->mgmt_ch;
|
|
|
|
|
|
|
|
if (bdev != entry->bdev) {
|
|
|
|
SPDK_ERRLOG("bdevs do not match\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mgmt_ch->per_thread_cache_count > 0) {
|
|
|
|
SPDK_ERRLOG("Cannot queue io_wait if spdk_bdev_io available in per-thread cache\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
TAILQ_INSERT_TAIL(&mgmt_ch->io_wait_queue, entry, link);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch)
|
|
|
|
{
|
|
|
|
struct spdk_bdev *bdev = bdev_ch->bdev;
|
2018-05-04 08:10:52 +00:00
|
|
|
struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
|
|
|
|
2018-05-04 08:10:52 +00:00
|
|
|
if (shared_resource->io_outstanding > shared_resource->nomem_threshold) {
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
/*
|
|
|
|
* Allow some more I/O to complete before retrying the nomem_io queue.
|
|
|
|
* Some drivers (such as nvme) cannot immediately take a new I/O in
|
|
|
|
* the context of a completion, because the resources for the I/O are
|
|
|
|
* not released until control returns to the bdev poller. Also, we
|
|
|
|
* may require several small I/O to complete before a larger I/O
|
|
|
|
* (that requires splitting) can be submitted.
|
|
|
|
*/
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-05-04 08:10:52 +00:00
|
|
|
while (!TAILQ_EMPTY(&shared_resource->nomem_io)) {
|
|
|
|
bdev_io = TAILQ_FIRST(&shared_resource->nomem_io);
|
2018-06-20 20:58:59 +00:00
|
|
|
TAILQ_REMOVE(&shared_resource->nomem_io, bdev_io, internal.link);
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch->io_outstanding++;
|
2018-05-04 08:10:52 +00:00
|
|
|
shared_resource->io_outstanding++;
|
2018-06-19 23:19:49 +00:00
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
|
2019-05-31 07:19:49 +00:00
|
|
|
bdev->fn_table->submit_request(spdk_bdev_io_get_io_channel(bdev_io), bdev_io);
|
2018-06-19 23:19:49 +00:00
|
|
|
if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-16 18:17:31 +00:00
|
|
|
static inline void
|
2017-06-15 18:48:27 +00:00
|
|
|
_spdk_bdev_io_complete(void *ctx)
|
2017-01-12 17:58:20 +00:00
|
|
|
{
|
2017-06-12 22:47:30 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = ctx;
|
2018-11-19 13:52:23 +00:00
|
|
|
uint64_t tsc, tsc_diff;
|
2017-01-12 17:58:20 +00:00
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
if (spdk_unlikely(bdev_io->internal.in_submit_request || bdev_io->internal.io_submit_ch)) {
|
2018-03-16 18:17:31 +00:00
|
|
|
/*
|
|
|
|
* Send the completion to the thread that originally submitted the I/O,
|
|
|
|
* which may not be the current thread in the case of QoS.
|
|
|
|
*/
|
2018-06-20 17:54:48 +00:00
|
|
|
if (bdev_io->internal.io_submit_ch) {
|
|
|
|
bdev_io->internal.ch = bdev_io->internal.io_submit_ch;
|
|
|
|
bdev_io->internal.io_submit_ch = NULL;
|
2018-03-16 18:17:31 +00:00
|
|
|
}
|
2017-12-28 03:11:55 +00:00
|
|
|
|
2018-03-16 18:17:31 +00:00
|
|
|
/*
|
|
|
|
* Defer completion to avoid potential infinite recursion if the
|
|
|
|
* user's completion callback issues a new I/O.
|
|
|
|
*/
|
2019-05-31 05:49:38 +00:00
|
|
|
spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
|
2018-03-16 18:17:31 +00:00
|
|
|
_spdk_bdev_io_complete, bdev_io);
|
|
|
|
return;
|
2017-12-28 03:11:55 +00:00
|
|
|
}
|
2018-03-16 18:17:31 +00:00
|
|
|
|
2018-08-31 20:13:32 +00:00
|
|
|
tsc = spdk_get_ticks();
|
2018-11-19 13:52:23 +00:00
|
|
|
tsc_diff = tsc - bdev_io->internal.submit_tsc;
|
2018-08-31 20:13:32 +00:00
|
|
|
spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_DONE, 0, 0, (uintptr_t)bdev_io, 0);
|
|
|
|
|
2018-11-19 13:31:19 +00:00
|
|
|
if (bdev_io->internal.ch->histogram) {
|
|
|
|
spdk_histogram_data_tally(bdev_io->internal.ch->histogram, tsc_diff);
|
|
|
|
}
|
|
|
|
|
2018-06-19 23:19:49 +00:00
|
|
|
if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
|
2018-04-19 20:27:17 +00:00
|
|
|
switch (bdev_io->type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch->stat.bytes_read += bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
|
|
|
|
bdev_io->internal.ch->stat.num_read_ops++;
|
2018-11-19 13:52:23 +00:00
|
|
|
bdev_io->internal.ch->stat.read_latency_ticks += tsc_diff;
|
2018-04-19 20:27:17 +00:00
|
|
|
break;
|
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch->stat.bytes_written += bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
|
|
|
|
bdev_io->internal.ch->stat.num_write_ops++;
|
2018-11-19 13:52:23 +00:00
|
|
|
bdev_io->internal.ch->stat.write_latency_ticks += tsc_diff;
|
2018-04-19 20:27:17 +00:00
|
|
|
break;
|
2019-01-04 02:45:53 +00:00
|
|
|
case SPDK_BDEV_IO_TYPE_UNMAP:
|
|
|
|
bdev_io->internal.ch->stat.bytes_unmapped += bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
|
|
|
|
bdev_io->internal.ch->stat.num_unmap_ops++;
|
|
|
|
bdev_io->internal.ch->stat.unmap_latency_ticks += tsc_diff;
|
2018-04-19 20:27:17 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef SPDK_CONFIG_VTUNE
|
|
|
|
uint64_t now_tsc = spdk_get_ticks();
|
2018-06-20 17:54:48 +00:00
|
|
|
if (now_tsc > (bdev_io->internal.ch->start_tsc + bdev_io->internal.ch->interval_tsc)) {
|
2018-04-19 20:27:17 +00:00
|
|
|
uint64_t data[5];
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
data[0] = bdev_io->internal.ch->stat.num_read_ops - bdev_io->internal.ch->prev_stat.num_read_ops;
|
|
|
|
data[1] = bdev_io->internal.ch->stat.bytes_read - bdev_io->internal.ch->prev_stat.bytes_read;
|
|
|
|
data[2] = bdev_io->internal.ch->stat.num_write_ops - bdev_io->internal.ch->prev_stat.num_write_ops;
|
|
|
|
data[3] = bdev_io->internal.ch->stat.bytes_written - bdev_io->internal.ch->prev_stat.bytes_written;
|
2018-04-19 20:27:17 +00:00
|
|
|
data[4] = bdev_io->bdev->fn_table->get_spin_time ?
|
2019-05-31 07:19:49 +00:00
|
|
|
bdev_io->bdev->fn_table->get_spin_time(spdk_bdev_io_get_io_channel(bdev_io)) : 0;
|
2018-04-19 20:27:17 +00:00
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
__itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_io->internal.ch->handle,
|
2018-04-19 20:27:17 +00:00
|
|
|
__itt_metadata_u64, 5, data);
|
|
|
|
|
2018-06-20 17:54:48 +00:00
|
|
|
bdev_io->internal.ch->prev_stat = bdev_io->internal.ch->stat;
|
|
|
|
bdev_io->internal.ch->start_tsc = now_tsc;
|
2018-04-19 20:27:17 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-06-19 23:57:20 +00:00
|
|
|
assert(bdev_io->internal.cb != NULL);
|
2019-05-31 05:49:38 +00:00
|
|
|
assert(spdk_get_thread() == spdk_bdev_io_get_thread(bdev_io));
|
2018-03-16 18:17:31 +00:00
|
|
|
|
2018-06-19 23:57:20 +00:00
|
|
|
bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
|
|
|
|
bdev_io->internal.caller_ctx);
|
2017-01-12 17:58:20 +00:00
|
|
|
}
|
|
|
|
|
2017-12-06 21:52:20 +00:00
|
|
|
static void
|
2017-12-11 22:14:19 +00:00
|
|
|
_spdk_bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
|
2017-12-06 21:52:20 +00:00
|
|
|
{
|
2017-12-11 22:14:19 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i);
|
2017-12-06 21:52:20 +00:00
|
|
|
|
|
|
|
if (bdev_io->u.reset.ch_ref != NULL) {
|
|
|
|
spdk_put_io_channel(bdev_io->u.reset.ch_ref);
|
|
|
|
bdev_io->u.reset.ch_ref = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
_spdk_bdev_io_complete(bdev_io);
|
|
|
|
}
|
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_unfreeze_channel(struct spdk_io_channel_iter *i)
|
2017-12-06 21:52:20 +00:00
|
|
|
{
|
2017-12-11 22:14:19 +00:00
|
|
|
struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
|
2017-12-06 21:52:20 +00:00
|
|
|
struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch);
|
|
|
|
|
|
|
|
ch->flags &= ~BDEV_CH_RESET_IN_PROGRESS;
|
|
|
|
if (!TAILQ_EMPTY(&ch->queued_resets)) {
|
|
|
|
_spdk_bdev_channel_start_reset(ch);
|
|
|
|
}
|
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
spdk_for_each_channel_continue(i, 0);
|
2017-12-06 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
|
|
|
|
{
|
2017-09-15 22:23:49 +00:00
|
|
|
struct spdk_bdev *bdev = bdev_io->bdev;
|
2018-06-20 17:54:48 +00:00
|
|
|
struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
|
2018-05-04 08:10:52 +00:00
|
|
|
struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
|
2017-09-15 22:23:49 +00:00
|
|
|
|
2018-06-19 23:19:49 +00:00
|
|
|
bdev_io->internal.status = status;
|
2017-06-12 22:47:30 +00:00
|
|
|
|
2017-09-14 21:02:09 +00:00
|
|
|
if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) {
|
2017-12-06 21:52:20 +00:00
|
|
|
bool unlock_channels = false;
|
|
|
|
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
if (status == SPDK_BDEV_IO_STATUS_NOMEM) {
|
|
|
|
SPDK_ERRLOG("NOMEM returned for reset\n");
|
|
|
|
}
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
if (bdev_io == bdev->internal.reset_in_progress) {
|
|
|
|
bdev->internal.reset_in_progress = NULL;
|
2017-12-06 21:52:20 +00:00
|
|
|
unlock_channels = true;
|
2017-09-12 19:14:48 +00:00
|
|
|
}
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-12-06 21:52:20 +00:00
|
|
|
|
|
|
|
if (unlock_channels) {
|
2018-02-19 22:21:15 +00:00
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_unfreeze_channel,
|
|
|
|
bdev_io, _spdk_bdev_reset_complete);
|
2017-12-06 21:52:20 +00:00
|
|
|
return;
|
2017-09-12 16:34:55 +00:00
|
|
|
}
|
2017-09-14 21:02:09 +00:00
|
|
|
} else {
|
2019-04-16 08:12:09 +00:00
|
|
|
_bdev_io_unset_bounce_buf(bdev_io);
|
2018-10-12 07:46:14 +00:00
|
|
|
|
2018-03-16 12:20:55 +00:00
|
|
|
assert(bdev_ch->io_outstanding > 0);
|
2018-05-04 08:10:52 +00:00
|
|
|
assert(shared_resource->io_outstanding > 0);
|
2018-03-16 12:20:55 +00:00
|
|
|
bdev_ch->io_outstanding--;
|
2018-05-04 08:10:52 +00:00
|
|
|
shared_resource->io_outstanding--;
|
2018-03-16 17:40:47 +00:00
|
|
|
|
|
|
|
if (spdk_unlikely(status == SPDK_BDEV_IO_STATUS_NOMEM)) {
|
2018-06-20 20:58:59 +00:00
|
|
|
TAILQ_INSERT_HEAD(&shared_resource->nomem_io, bdev_io, internal.link);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
/*
|
|
|
|
* Wait for some of the outstanding I/O to complete before we
|
|
|
|
* retry any of the nomem_io. Normally we will wait for
|
|
|
|
* NOMEM_THRESHOLD_COUNT I/O to complete but for low queue
|
|
|
|
* depth channels we will instead wait for half to complete.
|
|
|
|
*/
|
2018-05-04 08:10:52 +00:00
|
|
|
shared_resource->nomem_threshold = spdk_max((int64_t)shared_resource->io_outstanding / 2,
|
|
|
|
(int64_t)shared_resource->io_outstanding - NOMEM_THRESHOLD_COUNT);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
return;
|
|
|
|
}
|
2018-03-16 17:40:47 +00:00
|
|
|
|
2018-05-04 08:10:52 +00:00
|
|
|
if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) {
|
2018-03-16 17:40:47 +00:00
|
|
|
_spdk_bdev_ch_retry_io(bdev_ch);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-03-16 18:17:31 +00:00
|
|
|
_spdk_bdev_io_complete(bdev_io);
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2016-11-07 20:14:47 +00:00
|
|
|
void
|
2017-01-18 22:20:31 +00:00
|
|
|
spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc,
|
|
|
|
enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq)
|
|
|
|
{
|
|
|
|
if (sc == SPDK_SCSI_STATUS_GOOD) {
|
2018-06-19 23:19:49 +00:00
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
2017-01-18 22:20:31 +00:00
|
|
|
} else {
|
2018-06-19 23:19:49 +00:00
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SCSI_ERROR;
|
2018-06-20 23:38:35 +00:00
|
|
|
bdev_io->internal.error.scsi.sc = sc;
|
|
|
|
bdev_io->internal.error.scsi.sk = sk;
|
|
|
|
bdev_io->internal.error.scsi.asc = asc;
|
|
|
|
bdev_io->internal.error.scsi.ascq = ascq;
|
2017-01-18 22:20:31 +00:00
|
|
|
}
|
|
|
|
|
2018-06-19 23:19:49 +00:00
|
|
|
spdk_bdev_io_complete(bdev_io, bdev_io->internal.status);
|
2016-11-07 20:14:47 +00:00
|
|
|
}
|
|
|
|
|
2017-01-18 22:15:35 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io,
|
|
|
|
int *sc, int *sk, int *asc, int *ascq)
|
|
|
|
{
|
|
|
|
assert(sc != NULL);
|
|
|
|
assert(sk != NULL);
|
|
|
|
assert(asc != NULL);
|
|
|
|
assert(ascq != NULL);
|
|
|
|
|
2018-06-19 23:19:49 +00:00
|
|
|
switch (bdev_io->internal.status) {
|
2017-01-18 22:15:35 +00:00
|
|
|
case SPDK_BDEV_IO_STATUS_SUCCESS:
|
|
|
|
*sc = SPDK_SCSI_STATUS_GOOD;
|
|
|
|
*sk = SPDK_SCSI_SENSE_NO_SENSE;
|
|
|
|
*asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
|
|
|
|
*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
|
|
|
|
break;
|
|
|
|
case SPDK_BDEV_IO_STATUS_NVME_ERROR:
|
|
|
|
spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq);
|
|
|
|
break;
|
|
|
|
case SPDK_BDEV_IO_STATUS_SCSI_ERROR:
|
2018-06-20 23:38:35 +00:00
|
|
|
*sc = bdev_io->internal.error.scsi.sc;
|
|
|
|
*sk = bdev_io->internal.error.scsi.sk;
|
|
|
|
*asc = bdev_io->internal.error.scsi.asc;
|
|
|
|
*ascq = bdev_io->internal.error.scsi.ascq;
|
2017-01-18 22:15:35 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
*sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
|
|
|
|
*sk = SPDK_SCSI_SENSE_ABORTED_COMMAND;
|
|
|
|
*asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
|
|
|
|
*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-18 21:43:15 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc)
|
|
|
|
{
|
|
|
|
if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) {
|
2018-06-19 23:19:49 +00:00
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
2017-01-18 21:43:15 +00:00
|
|
|
} else {
|
2018-06-20 23:38:35 +00:00
|
|
|
bdev_io->internal.error.nvme.sct = sct;
|
|
|
|
bdev_io->internal.error.nvme.sc = sc;
|
2018-06-19 23:19:49 +00:00
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_NVME_ERROR;
|
2017-01-18 21:43:15 +00:00
|
|
|
}
|
|
|
|
|
2018-06-19 23:19:49 +00:00
|
|
|
spdk_bdev_io_complete(bdev_io, bdev_io->internal.status);
|
2017-01-18 21:43:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc)
|
|
|
|
{
|
|
|
|
assert(sct != NULL);
|
|
|
|
assert(sc != NULL);
|
|
|
|
|
2018-06-19 23:19:49 +00:00
|
|
|
if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR) {
|
2018-06-20 23:38:35 +00:00
|
|
|
*sct = bdev_io->internal.error.nvme.sct;
|
|
|
|
*sc = bdev_io->internal.error.nvme.sc;
|
2018-06-19 23:19:49 +00:00
|
|
|
} else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
|
2017-01-18 21:43:15 +00:00
|
|
|
*sct = SPDK_NVME_SCT_GENERIC;
|
|
|
|
*sc = SPDK_NVME_SC_SUCCESS;
|
|
|
|
} else {
|
|
|
|
*sct = SPDK_NVME_SCT_GENERIC;
|
|
|
|
*sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-11 16:47:52 +00:00
|
|
|
struct spdk_thread *
|
|
|
|
spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
2018-06-20 17:54:48 +00:00
|
|
|
return spdk_io_channel_get_thread(bdev_io->internal.ch->channel);
|
2017-10-11 16:47:52 +00:00
|
|
|
}
|
|
|
|
|
2019-02-08 05:20:52 +00:00
|
|
|
struct spdk_io_channel *
|
|
|
|
spdk_bdev_io_get_io_channel(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
return bdev_io->internal.ch->channel;
|
|
|
|
}
|
|
|
|
|
2018-06-04 06:13:06 +00:00
|
|
|
static void
|
2018-09-04 15:01:51 +00:00
|
|
|
_spdk_bdev_qos_config_limit(struct spdk_bdev *bdev, uint64_t *limits)
|
2018-06-04 06:13:06 +00:00
|
|
|
{
|
2018-09-04 15:01:51 +00:00
|
|
|
uint64_t min_qos_set;
|
|
|
|
int i;
|
2018-06-04 06:13:06 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
|
|
|
|
break;
|
|
|
|
}
|
2018-06-04 06:13:06 +00:00
|
|
|
}
|
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
if (i == SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES) {
|
|
|
|
SPDK_ERRLOG("Invalid rate limits set.\n");
|
2018-06-04 06:13:06 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) {
|
|
|
|
min_qos_set = SPDK_BDEV_QOS_MIN_IOS_PER_SEC;
|
|
|
|
} else {
|
|
|
|
min_qos_set = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (limits[i] == 0 || limits[i] % min_qos_set) {
|
|
|
|
SPDK_ERRLOG("Assigned limit %" PRIu64 " on bdev %s is not multiple of %" PRIu64 "\n",
|
|
|
|
limits[i], bdev->name, min_qos_set);
|
|
|
|
SPDK_ERRLOG("Failed to enable QoS on this bdev %s\n", bdev->name);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
if (!bdev->internal.qos) {
|
|
|
|
bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
|
|
|
|
if (!bdev->internal.qos) {
|
2018-06-04 06:13:06 +00:00
|
|
|
SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
bdev->internal.qos->rate_limits[i].limit = limits[i];
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Bdev:%s QoS type:%d set:%lu\n",
|
|
|
|
bdev->name, i, limits[i]);
|
2018-06-04 06:13:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-12-28 21:49:46 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_qos_config(struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
struct spdk_conf_section *sp = NULL;
|
|
|
|
const char *val = NULL;
|
2018-06-04 06:13:06 +00:00
|
|
|
int i = 0, j = 0;
|
2018-09-04 15:01:51 +00:00
|
|
|
uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES] = {};
|
|
|
|
bool config_qos = false;
|
2017-12-28 21:49:46 +00:00
|
|
|
|
|
|
|
sp = spdk_conf_find_section(NULL, "QoS");
|
|
|
|
if (!sp) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
while (j < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES) {
|
|
|
|
limits[j] = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
|
|
|
|
|
2018-06-04 06:13:06 +00:00
|
|
|
i = 0;
|
|
|
|
while (true) {
|
2018-09-04 15:01:51 +00:00
|
|
|
val = spdk_conf_section_get_nmval(sp, qos_conf_type[j], i, 0);
|
2018-06-04 06:13:06 +00:00
|
|
|
if (!val) {
|
|
|
|
break;
|
|
|
|
}
|
2017-12-28 21:49:46 +00:00
|
|
|
|
2018-06-04 06:13:06 +00:00
|
|
|
if (strcmp(bdev->name, val) != 0) {
|
|
|
|
i++;
|
|
|
|
continue;
|
|
|
|
}
|
2017-12-28 21:49:46 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
val = spdk_conf_section_get_nmval(sp, qos_conf_type[j], i, 1);
|
2018-06-04 06:13:06 +00:00
|
|
|
if (val) {
|
2018-09-04 15:01:51 +00:00
|
|
|
if (_spdk_bdev_qos_is_iops_rate_limit(j) == true) {
|
|
|
|
limits[j] = strtoull(val, NULL, 10);
|
|
|
|
} else {
|
|
|
|
limits[j] = strtoull(val, NULL, 10) * 1024 * 1024;
|
|
|
|
}
|
|
|
|
config_qos = true;
|
2017-12-28 21:49:46 +00:00
|
|
|
}
|
2018-06-04 06:13:06 +00:00
|
|
|
|
|
|
|
break;
|
2017-12-28 21:49:46 +00:00
|
|
|
}
|
|
|
|
|
2018-06-04 06:13:06 +00:00
|
|
|
j++;
|
2017-12-28 21:49:46 +00:00
|
|
|
}
|
2018-06-04 06:13:06 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
if (config_qos == true) {
|
|
|
|
_spdk_bdev_qos_config_limit(bdev, limits);
|
|
|
|
}
|
|
|
|
|
2018-06-04 06:13:06 +00:00
|
|
|
return;
|
2017-12-28 21:49:46 +00:00
|
|
|
}
|
|
|
|
|
2017-11-20 09:31:39 +00:00
|
|
|
static int
|
2018-03-23 19:35:21 +00:00
|
|
|
spdk_bdev_init(struct spdk_bdev *bdev)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2018-08-30 20:26:50 +00:00
|
|
|
char *bdev_name;
|
|
|
|
|
2017-07-07 00:36:17 +00:00
|
|
|
assert(bdev->module != NULL);
|
|
|
|
|
2017-11-20 09:31:39 +00:00
|
|
|
if (!bdev->name) {
|
|
|
|
SPDK_ERRLOG("Bdev name is NULL\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-07-02 14:13:31 +00:00
|
|
|
if (!strlen(bdev->name)) {
|
|
|
|
SPDK_ERRLOG("Bdev name must not be an empty string\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-11-20 09:31:39 +00:00
|
|
|
if (spdk_bdev_get_by_name(bdev->name)) {
|
|
|
|
SPDK_ERRLOG("Bdev name:%s already exists\n", bdev->name);
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
|
2018-08-30 20:26:50 +00:00
|
|
|
/* Users often register their own I/O devices using the bdev name. In
|
|
|
|
* order to avoid conflicts, prepend bdev_. */
|
|
|
|
bdev_name = spdk_sprintf_alloc("bdev_%s", bdev->name);
|
|
|
|
if (!bdev_name) {
|
|
|
|
SPDK_ERRLOG("Unable to allocate memory for internal bdev name.\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev->internal.status = SPDK_BDEV_STATUS_READY;
|
2018-07-05 17:44:30 +00:00
|
|
|
bdev->internal.measured_queue_depth = UINT64_MAX;
|
2018-09-26 10:35:12 +00:00
|
|
|
bdev->internal.claim_module = NULL;
|
|
|
|
bdev->internal.qd_poller = NULL;
|
|
|
|
bdev->internal.qos = NULL;
|
2017-06-29 18:23:50 +00:00
|
|
|
|
2019-06-27 09:44:24 +00:00
|
|
|
/* If the user didn't specify a uuid, generate one. */
|
|
|
|
if (spdk_mem_all_zero(&bdev->uuid, sizeof(bdev->uuid))) {
|
|
|
|
spdk_uuid_generate(&bdev->uuid);
|
|
|
|
}
|
|
|
|
|
2018-10-24 13:12:30 +00:00
|
|
|
if (spdk_bdev_get_buf_align(bdev) > 1) {
|
|
|
|
if (bdev->split_on_optimal_io_boundary) {
|
|
|
|
bdev->optimal_io_boundary = spdk_min(bdev->optimal_io_boundary,
|
|
|
|
SPDK_BDEV_LARGE_BUF_MAX_SIZE / bdev->blocklen);
|
|
|
|
} else {
|
|
|
|
bdev->split_on_optimal_io_boundary = true;
|
|
|
|
bdev->optimal_io_boundary = SPDK_BDEV_LARGE_BUF_MAX_SIZE / bdev->blocklen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
TAILQ_INIT(&bdev->internal.open_descs);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2017-11-29 15:13:17 +00:00
|
|
|
TAILQ_INIT(&bdev->aliases);
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev->internal.reset_in_progress = NULL;
|
2017-06-15 14:17:12 +00:00
|
|
|
|
2017-12-28 21:49:46 +00:00
|
|
|
_spdk_bdev_qos_config(bdev);
|
|
|
|
|
2018-02-19 22:21:15 +00:00
|
|
|
spdk_io_device_register(__bdev_to_io_dev(bdev),
|
|
|
|
spdk_bdev_channel_create, spdk_bdev_channel_destroy,
|
2018-08-30 20:26:50 +00:00
|
|
|
sizeof(struct spdk_bdev_channel),
|
|
|
|
bdev_name);
|
|
|
|
|
|
|
|
free(bdev_name);
|
2017-04-04 21:01:54 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_init(&bdev->internal.mutex, NULL);
|
2018-03-23 19:35:21 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-03 21:27:23 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_destroy_cb(void *io_device)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
spdk_bdev_unregister_cb cb_fn;
|
|
|
|
void *cb_arg;
|
|
|
|
|
|
|
|
bdev = __bdev_from_io_dev(io_device);
|
2018-06-21 20:03:02 +00:00
|
|
|
cb_fn = bdev->internal.unregister_cb;
|
|
|
|
cb_arg = bdev->internal.unregister_ctx;
|
2018-04-03 21:27:23 +00:00
|
|
|
|
|
|
|
rc = bdev->fn_table->destruct(bdev->ctxt);
|
|
|
|
if (rc < 0) {
|
|
|
|
SPDK_ERRLOG("destruct failed\n");
|
|
|
|
}
|
|
|
|
if (rc <= 0 && cb_fn != NULL) {
|
|
|
|
cb_fn(cb_arg, rc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-03-23 19:35:21 +00:00
|
|
|
static void
|
|
|
|
spdk_bdev_fini(struct spdk_bdev *bdev)
|
|
|
|
{
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_destroy(&bdev->internal.mutex);
|
2018-03-23 19:35:21 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
free(bdev->internal.qos);
|
2018-04-24 22:44:14 +00:00
|
|
|
|
2018-04-03 21:27:23 +00:00
|
|
|
spdk_io_device_unregister(__bdev_to_io_dev(bdev), spdk_bdev_destroy_cb);
|
2018-03-23 19:35:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spdk_bdev_start(struct spdk_bdev *bdev)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_module *module;
|
2018-06-26 12:07:11 +00:00
|
|
|
uint32_t action;
|
2018-03-23 19:35:21 +00:00
|
|
|
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Inserting bdev %s into list\n", bdev->name);
|
2018-06-21 20:03:02 +00:00
|
|
|
TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, internal.link);
|
2017-02-10 20:18:49 +00:00
|
|
|
|
2018-06-26 12:07:11 +00:00
|
|
|
/* Examine configuration before initializing I/O */
|
2018-06-21 20:32:40 +00:00
|
|
|
TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
2018-06-26 12:07:11 +00:00
|
|
|
if (module->examine_config) {
|
|
|
|
action = module->internal.action_in_progress;
|
2018-06-21 20:32:40 +00:00
|
|
|
module->internal.action_in_progress++;
|
2018-06-26 12:07:11 +00:00
|
|
|
module->examine_config(bdev);
|
|
|
|
if (action != module->internal.action_in_progress) {
|
|
|
|
SPDK_ERRLOG("examine_config for module %s did not call spdk_bdev_module_examine_done()\n",
|
|
|
|
module->name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdev->internal.claim_module) {
|
2019-03-15 09:32:24 +00:00
|
|
|
if (bdev->internal.claim_module->examine_disk) {
|
|
|
|
bdev->internal.claim_module->internal.action_in_progress++;
|
|
|
|
bdev->internal.claim_module->examine_disk(bdev);
|
|
|
|
}
|
2018-06-26 12:07:11 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
|
|
|
if (module->examine_disk) {
|
|
|
|
module->internal.action_in_progress++;
|
|
|
|
module->examine_disk(bdev);
|
2017-07-13 04:06:22 +00:00
|
|
|
}
|
2017-02-10 20:18:49 +00:00
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-11-20 09:31:39 +00:00
|
|
|
int
|
2017-06-29 20:16:26 +00:00
|
|
|
spdk_bdev_register(struct spdk_bdev *bdev)
|
|
|
|
{
|
2018-03-23 19:35:21 +00:00
|
|
|
int rc = spdk_bdev_init(bdev);
|
|
|
|
|
|
|
|
if (rc == 0) {
|
|
|
|
spdk_bdev_start(bdev);
|
|
|
|
}
|
|
|
|
|
2019-03-22 11:19:42 +00:00
|
|
|
spdk_notify_send("bdev_register", spdk_bdev_get_name(bdev));
|
2018-03-23 19:35:21 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2017-11-20 09:31:39 +00:00
|
|
|
int
|
2017-06-29 20:16:26 +00:00
|
|
|
spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count)
|
|
|
|
{
|
2019-04-03 12:59:05 +00:00
|
|
|
SPDK_ERRLOG("This function is deprecated. Use spdk_bdev_register() instead.\n");
|
|
|
|
return spdk_bdev_register(vbdev);
|
2017-06-29 20:16:26 +00:00
|
|
|
}
|
|
|
|
|
2016-07-20 18:16:23 +00:00
|
|
|
void
|
2018-02-22 12:29:49 +00:00
|
|
|
spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno)
|
2017-10-25 09:11:59 +00:00
|
|
|
{
|
2018-06-21 20:03:02 +00:00
|
|
|
if (bdev->internal.unregister_cb != NULL) {
|
|
|
|
bdev->internal.unregister_cb(bdev->internal.unregister_ctx, bdeverrno);
|
2017-10-25 09:11:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-15 23:12:15 +00:00
|
|
|
static void
|
|
|
|
_remove_notify(void *arg)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_desc *desc = arg;
|
|
|
|
|
2018-08-24 07:46:11 +00:00
|
|
|
desc->remove_scheduled = false;
|
|
|
|
|
|
|
|
if (desc->closed) {
|
|
|
|
free(desc);
|
|
|
|
} else {
|
|
|
|
desc->remove_cb(desc->remove_ctx);
|
|
|
|
}
|
2018-02-15 23:12:15 +00:00
|
|
|
}
|
|
|
|
|
2019-02-22 13:28:43 +00:00
|
|
|
/* Must be called while holding bdev->internal.mutex.
|
|
|
|
* returns: 0 - bdev removed and ready to be destructed.
|
|
|
|
* -EBUSY - bdev can't be destructed yet. */
|
|
|
|
static int
|
|
|
|
spdk_bdev_unregister_unsafe(struct spdk_bdev *bdev)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2017-06-29 18:23:50 +00:00
|
|
|
struct spdk_bdev_desc *desc, *tmp;
|
2019-02-22 13:28:43 +00:00
|
|
|
int rc = 0;
|
2017-06-29 18:23:50 +00:00
|
|
|
|
2019-04-04 10:26:53 +00:00
|
|
|
/* Notify each descriptor about hotremoval */
|
2018-06-21 20:03:02 +00:00
|
|
|
TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) {
|
2019-04-04 10:26:53 +00:00
|
|
|
rc = -EBUSY;
|
2017-06-29 18:23:50 +00:00
|
|
|
if (desc->remove_cb) {
|
2018-02-15 23:12:15 +00:00
|
|
|
/*
|
|
|
|
* Defer invocation of the remove_cb to a separate message that will
|
2018-09-05 23:14:47 +00:00
|
|
|
* run later on its thread. This ensures this context unwinds and
|
2018-02-15 23:12:15 +00:00
|
|
|
* we don't recursively unregister this bdev again if the remove_cb
|
|
|
|
* immediately closes its descriptor.
|
|
|
|
*/
|
2018-06-06 13:46:13 +00:00
|
|
|
if (!desc->remove_scheduled) {
|
|
|
|
/* Avoid scheduling removal of the same descriptor multiple times. */
|
|
|
|
desc->remove_scheduled = true;
|
2018-09-05 23:14:47 +00:00
|
|
|
spdk_thread_send_msg(desc->thread, _remove_notify, desc);
|
2018-06-06 13:46:13 +00:00
|
|
|
}
|
2017-01-25 01:04:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-04 10:26:53 +00:00
|
|
|
/* If there are no descriptors, proceed removing the bdev */
|
2019-02-22 13:28:43 +00:00
|
|
|
if (rc == 0) {
|
|
|
|
TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Removing bdev %s from list done\n", bdev->name);
|
2019-03-22 11:19:42 +00:00
|
|
|
spdk_notify_send("bdev_unregister", spdk_bdev_get_name(bdev));
|
2019-02-22 13:28:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
|
|
|
|
{
|
|
|
|
struct spdk_thread *thread;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Removing bdev %s from list\n", bdev->name);
|
|
|
|
|
|
|
|
thread = spdk_get_thread();
|
|
|
|
if (!thread) {
|
|
|
|
/* The user called this from a non-SPDK thread. */
|
|
|
|
if (cb_fn != NULL) {
|
|
|
|
cb_fn(cb_arg, -ENOTSUP);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-06-18 12:18:32 +00:00
|
|
|
pthread_mutex_lock(&g_bdev_mgr.mutex);
|
2019-02-22 13:28:43 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2019-06-18 12:18:32 +00:00
|
|
|
pthread_mutex_unlock(&g_bdev_mgr.mutex);
|
2019-02-22 13:28:43 +00:00
|
|
|
if (cb_fn) {
|
|
|
|
cb_fn(cb_arg, -EBUSY);
|
|
|
|
}
|
2017-06-29 18:23:50 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-02-22 13:28:43 +00:00
|
|
|
bdev->internal.status = SPDK_BDEV_STATUS_REMOVING;
|
|
|
|
bdev->internal.unregister_cb = cb_fn;
|
|
|
|
bdev->internal.unregister_ctx = cb_arg;
|
|
|
|
|
|
|
|
/* Call under lock. */
|
|
|
|
rc = spdk_bdev_unregister_unsafe(bdev);
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2019-06-18 12:18:32 +00:00
|
|
|
pthread_mutex_unlock(&g_bdev_mgr.mutex);
|
2016-07-20 18:16:23 +00:00
|
|
|
|
2019-02-22 13:28:43 +00:00
|
|
|
if (rc == 0) {
|
|
|
|
spdk_bdev_fini(bdev);
|
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
|
|
|
|
2019-07-25 07:42:19 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_dummy_event_cb(void *remove_ctx)
|
|
|
|
{
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Bdev remove event received with no remove callback specified");
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
_spdk_bdev_open(struct spdk_bdev *bdev, bool write, struct spdk_bdev_desc *desc)
|
2017-06-29 18:23:50 +00:00
|
|
|
{
|
2018-09-05 23:14:47 +00:00
|
|
|
struct spdk_thread *thread;
|
2019-02-01 19:21:03 +00:00
|
|
|
struct set_qos_limit_ctx *ctx;
|
2018-09-05 23:14:47 +00:00
|
|
|
|
|
|
|
thread = spdk_get_thread();
|
|
|
|
if (!thread) {
|
|
|
|
SPDK_ERRLOG("Cannot open bdev from non-SPDK thread.\n");
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
2017-06-29 18:23:50 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Opening descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
|
|
|
|
spdk_get_thread());
|
|
|
|
|
2018-12-29 13:16:17 +00:00
|
|
|
desc->bdev = bdev;
|
|
|
|
desc->thread = thread;
|
|
|
|
desc->write = write;
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
2019-07-25 08:03:23 +00:00
|
|
|
if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
2017-01-10 16:54:23 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
if (write && bdev->internal.claim_module) {
|
2018-07-26 05:34:52 +00:00
|
|
|
SPDK_ERRLOG("Could not open %s - %s module already claimed it\n",
|
|
|
|
bdev->name, bdev->internal.claim_module->name);
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-06-29 18:23:50 +00:00
|
|
|
return -EPERM;
|
2017-01-10 16:54:23 +00:00
|
|
|
}
|
|
|
|
|
2019-02-01 19:21:03 +00:00
|
|
|
/* Enable QoS */
|
|
|
|
if (bdev->internal.qos && bdev->internal.qos->thread == NULL) {
|
|
|
|
ctx = calloc(1, sizeof(*ctx));
|
|
|
|
if (ctx == NULL) {
|
|
|
|
SPDK_ERRLOG("Failed to allocate memory for QoS context\n");
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
ctx->bdev = bdev;
|
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev),
|
|
|
|
_spdk_bdev_enable_qos_msg, ctx,
|
|
|
|
_spdk_bdev_enable_qos_done);
|
|
|
|
}
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
TAILQ_INSERT_TAIL(&bdev->internal.open_descs, desc, link);
|
2017-06-29 18:23:50 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-01-10 16:54:23 +00:00
|
|
|
|
2017-06-29 18:23:50 +00:00
|
|
|
return 0;
|
2017-01-10 16:54:23 +00:00
|
|
|
}
|
|
|
|
|
2019-07-25 07:42:19 +00:00
|
|
|
int
|
|
|
|
spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb,
|
|
|
|
void *remove_ctx, struct spdk_bdev_desc **_desc)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_desc *desc;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
desc = calloc(1, sizeof(*desc));
|
|
|
|
if (desc == NULL) {
|
|
|
|
SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (remove_cb == NULL) {
|
|
|
|
remove_cb = _spdk_bdev_dummy_event_cb;
|
|
|
|
}
|
|
|
|
|
|
|
|
desc->remove_cb = remove_cb;
|
|
|
|
desc->remove_ctx = remove_ctx;
|
|
|
|
|
|
|
|
rc = _spdk_bdev_open(bdev, write, desc);
|
|
|
|
if (rc != 0) {
|
|
|
|
free(desc);
|
|
|
|
desc = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
*_desc = desc;
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2017-01-10 16:54:23 +00:00
|
|
|
void
|
2017-06-29 18:23:50 +00:00
|
|
|
spdk_bdev_close(struct spdk_bdev_desc *desc)
|
2017-01-10 16:54:23 +00:00
|
|
|
{
|
2019-06-05 08:57:19 +00:00
|
|
|
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
2019-02-22 13:28:43 +00:00
|
|
|
int rc;
|
2017-01-10 16:54:23 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Closing descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
|
|
|
|
spdk_get_thread());
|
|
|
|
|
2019-06-24 15:43:39 +00:00
|
|
|
assert(desc->thread == spdk_get_thread());
|
2018-09-05 23:14:47 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
2017-06-29 18:23:50 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
TAILQ_REMOVE(&bdev->internal.open_descs, desc, link);
|
2018-08-24 07:46:11 +00:00
|
|
|
|
|
|
|
desc->closed = true;
|
|
|
|
|
|
|
|
if (!desc->remove_scheduled) {
|
|
|
|
free(desc);
|
|
|
|
}
|
2017-06-29 18:23:50 +00:00
|
|
|
|
2018-04-06 17:53:52 +00:00
|
|
|
/* If no more descriptors, kill QoS channel */
|
2018-06-21 20:03:02 +00:00
|
|
|
if (bdev->internal.qos && TAILQ_EMPTY(&bdev->internal.open_descs)) {
|
2018-04-06 17:53:52 +00:00
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Closed last descriptor for bdev %s on thread %p. Stopping QoS.\n",
|
|
|
|
bdev->name, spdk_get_thread());
|
|
|
|
|
|
|
|
if (spdk_bdev_qos_destroy(bdev)) {
|
|
|
|
/* There isn't anything we can do to recover here. Just let the
|
|
|
|
* old QoS poller keep running. The QoS handling won't change
|
|
|
|
* cores when the user allocates a new channel, but it won't break. */
|
|
|
|
SPDK_ERRLOG("Unable to shut down QoS poller. It will continue running on the current thread.\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-05 17:44:30 +00:00
|
|
|
spdk_bdev_set_qd_sampling_period(bdev, 0);
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->internal.open_descs)) {
|
2019-02-22 13:28:43 +00:00
|
|
|
rc = spdk_bdev_unregister_unsafe(bdev);
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-01-25 01:04:14 +00:00
|
|
|
|
2019-02-22 13:28:43 +00:00
|
|
|
if (rc == 0) {
|
|
|
|
spdk_bdev_fini(bdev);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-01-25 01:04:14 +00:00
|
|
|
}
|
2017-01-10 16:54:23 +00:00
|
|
|
}
|
|
|
|
|
2017-07-07 23:04:52 +00:00
|
|
|
int
|
2017-07-13 04:06:22 +00:00
|
|
|
spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *module)
|
2017-07-07 23:04:52 +00:00
|
|
|
{
|
2018-06-21 20:03:02 +00:00
|
|
|
if (bdev->internal.claim_module != NULL) {
|
2017-07-07 23:04:52 +00:00
|
|
|
SPDK_ERRLOG("bdev %s already claimed by module %s\n", bdev->name,
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev->internal.claim_module->name);
|
2017-07-07 23:04:52 +00:00
|
|
|
return -EPERM;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (desc && !desc->write) {
|
|
|
|
desc->write = true;
|
|
|
|
}
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev->internal.claim_module = module;
|
2017-07-07 23:04:52 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-07-13 04:06:22 +00:00
|
|
|
spdk_bdev_module_release_bdev(struct spdk_bdev *bdev)
|
2017-07-07 23:04:52 +00:00
|
|
|
{
|
2018-06-21 20:03:02 +00:00
|
|
|
assert(bdev->internal.claim_module != NULL);
|
|
|
|
bdev->internal.claim_module = NULL;
|
2017-07-07 23:04:52 +00:00
|
|
|
}
|
|
|
|
|
2017-07-06 19:39:19 +00:00
|
|
|
struct spdk_bdev *
|
|
|
|
spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc)
|
|
|
|
{
|
2019-07-08 16:12:30 +00:00
|
|
|
assert(desc != NULL);
|
2017-07-06 19:39:19 +00:00
|
|
|
return desc->bdev;
|
|
|
|
}
|
|
|
|
|
2017-05-04 20:57:07 +00:00
|
|
|
void
|
|
|
|
spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp)
|
|
|
|
{
|
|
|
|
struct iovec *iovs;
|
|
|
|
int iovcnt;
|
|
|
|
|
|
|
|
if (bdev_io == NULL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (bdev_io->type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
2017-11-07 22:05:19 +00:00
|
|
|
case SPDK_BDEV_IO_TYPE_ZCOPY:
|
2017-09-20 13:10:17 +00:00
|
|
|
iovs = bdev_io->u.bdev.iovs;
|
|
|
|
iovcnt = bdev_io->u.bdev.iovcnt;
|
2017-05-04 20:57:07 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
iovs = NULL;
|
|
|
|
iovcnt = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (iovp) {
|
|
|
|
*iovp = iovs;
|
|
|
|
}
|
|
|
|
if (iovcntp) {
|
|
|
|
*iovcntp = iovcnt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-16 08:12:09 +00:00
|
|
|
void *
|
|
|
|
spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io)
|
|
|
|
{
|
|
|
|
if (bdev_io == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!spdk_bdev_is_md_separate(bdev_io->bdev)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ ||
|
|
|
|
bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
|
|
|
|
return bdev_io->u.bdev.md_buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-05-09 21:01:12 +00:00
|
|
|
void
|
2018-03-09 22:20:21 +00:00
|
|
|
spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module)
|
2016-07-20 18:16:23 +00:00
|
|
|
{
|
2018-03-06 18:52:46 +00:00
|
|
|
|
|
|
|
if (spdk_bdev_module_list_find(bdev_module->name)) {
|
2018-05-03 18:36:36 +00:00
|
|
|
SPDK_ERRLOG("ERROR: module '%s' already registered.\n", bdev_module->name);
|
2018-03-06 18:52:46 +00:00
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
2017-07-13 04:06:22 +00:00
|
|
|
/*
|
|
|
|
* Modules with examine callbacks must be initialized first, so they are
|
|
|
|
* ready to handle examine callbacks from later modules that will
|
|
|
|
* register physical bdevs.
|
|
|
|
*/
|
2018-06-26 12:07:11 +00:00
|
|
|
if (bdev_module->examine_config != NULL || bdev_module->examine_disk != NULL) {
|
2018-06-21 20:32:40 +00:00
|
|
|
TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
|
2017-07-13 04:06:22 +00:00
|
|
|
} else {
|
2018-06-21 20:32:40 +00:00
|
|
|
TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
|
2017-07-13 04:06:22 +00:00
|
|
|
}
|
2016-07-20 18:16:23 +00:00
|
|
|
}
|
2017-08-25 21:22:46 +00:00
|
|
|
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *
|
2018-03-06 18:52:46 +00:00
|
|
|
spdk_bdev_module_list_find(const char *name)
|
|
|
|
{
|
2018-03-09 22:20:21 +00:00
|
|
|
struct spdk_bdev_module *bdev_module;
|
2018-03-06 18:52:46 +00:00
|
|
|
|
2018-06-21 20:32:40 +00:00
|
|
|
TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
|
2018-03-06 18:52:46 +00:00
|
|
|
if (strcmp(name, bdev_module->name) == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return bdev_module;
|
|
|
|
}
|
|
|
|
|
2017-07-28 22:34:24 +00:00
|
|
|
static void
|
2018-08-23 22:08:17 +00:00
|
|
|
_spdk_bdev_write_zero_buffer_next(void *_bdev_io)
|
2017-07-28 22:34:24 +00:00
|
|
|
{
|
2018-08-23 22:08:17 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = _bdev_io;
|
|
|
|
uint64_t num_bytes, num_blocks;
|
2019-06-11 12:31:15 +00:00
|
|
|
void *md_buf = NULL;
|
2018-08-23 22:08:17 +00:00
|
|
|
int rc;
|
2017-07-28 22:34:24 +00:00
|
|
|
|
2019-06-11 12:31:15 +00:00
|
|
|
num_bytes = spdk_min(_bdev_get_block_size_with_md(bdev_io->bdev) *
|
2018-08-23 22:08:17 +00:00
|
|
|
bdev_io->u.bdev.split_remaining_num_blocks,
|
|
|
|
ZERO_BUFFER_SIZE);
|
2019-06-11 12:31:15 +00:00
|
|
|
num_blocks = num_bytes / _bdev_get_block_size_with_md(bdev_io->bdev);
|
|
|
|
|
|
|
|
if (spdk_bdev_is_md_separate(bdev_io->bdev)) {
|
|
|
|
md_buf = (char *)g_bdev_mgr.zero_buffer +
|
|
|
|
spdk_bdev_get_block_size(bdev_io->bdev) * num_blocks;
|
|
|
|
}
|
2018-08-23 22:08:17 +00:00
|
|
|
|
2019-06-11 12:31:15 +00:00
|
|
|
rc = _spdk_bdev_write_blocks_with_md(bdev_io->internal.desc,
|
|
|
|
spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
|
|
|
g_bdev_mgr.zero_buffer, md_buf,
|
|
|
|
bdev_io->u.bdev.split_current_offset_blocks, num_blocks,
|
|
|
|
_spdk_bdev_write_zero_buffer_done, bdev_io);
|
2018-08-23 22:08:17 +00:00
|
|
|
if (rc == 0) {
|
|
|
|
bdev_io->u.bdev.split_remaining_num_blocks -= num_blocks;
|
|
|
|
bdev_io->u.bdev.split_current_offset_blocks += num_blocks;
|
|
|
|
} else if (rc == -ENOMEM) {
|
2018-09-28 03:57:26 +00:00
|
|
|
_spdk_bdev_queue_io_wait_with_cb(bdev_io, _spdk_bdev_write_zero_buffer_next);
|
2018-08-23 22:08:17 +00:00
|
|
|
} else {
|
|
|
|
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
|
2018-09-25 05:51:38 +00:00
|
|
|
bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
|
2017-07-28 22:34:24 +00:00
|
|
|
}
|
2018-08-23 22:08:17 +00:00
|
|
|
}
|
2017-07-28 22:34:24 +00:00
|
|
|
|
2018-08-23 22:08:17 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_io *parent_io = cb_arg;
|
2017-07-28 22:34:24 +00:00
|
|
|
|
2018-09-05 22:07:38 +00:00
|
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
|
2018-08-23 22:08:17 +00:00
|
|
|
if (!success) {
|
|
|
|
parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
|
2018-09-25 05:51:38 +00:00
|
|
|
parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
|
2018-08-23 22:08:17 +00:00
|
|
|
return;
|
|
|
|
}
|
2017-07-28 22:34:24 +00:00
|
|
|
|
2018-08-23 22:08:17 +00:00
|
|
|
if (parent_io->u.bdev.split_remaining_num_blocks == 0) {
|
|
|
|
parent_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
2018-09-25 05:51:38 +00:00
|
|
|
parent_io->internal.cb(parent_io, true, parent_io->internal.caller_ctx);
|
2018-08-23 22:08:17 +00:00
|
|
|
return;
|
2017-07-28 22:34:24 +00:00
|
|
|
}
|
2018-08-23 22:08:17 +00:00
|
|
|
|
|
|
|
_spdk_bdev_write_zero_buffer_next(parent_io);
|
2017-07-28 22:34:24 +00:00
|
|
|
}
|
|
|
|
|
2017-12-29 08:02:08 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_set_qos_limit_done(struct set_qos_limit_ctx *ctx, int status)
|
|
|
|
{
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&ctx->bdev->internal.mutex);
|
|
|
|
ctx->bdev->internal.qos_mod_in_progress = false;
|
|
|
|
pthread_mutex_unlock(&ctx->bdev->internal.mutex);
|
2018-04-20 20:55:51 +00:00
|
|
|
|
2019-02-01 19:21:03 +00:00
|
|
|
if (ctx->cb_fn) {
|
|
|
|
ctx->cb_fn(ctx->cb_arg, status);
|
|
|
|
}
|
2017-12-29 08:02:08 +00:00
|
|
|
free(ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-04-20 20:55:51 +00:00
|
|
|
_spdk_bdev_disable_qos_done(void *cb_arg)
|
2017-12-29 08:02:08 +00:00
|
|
|
{
|
|
|
|
struct set_qos_limit_ctx *ctx = cb_arg;
|
|
|
|
struct spdk_bdev *bdev = ctx->bdev;
|
2018-07-05 15:57:33 +00:00
|
|
|
struct spdk_bdev_io *bdev_io;
|
2018-04-20 20:55:51 +00:00
|
|
|
struct spdk_bdev_qos *qos;
|
2017-12-29 08:02:08 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
qos = bdev->internal.qos;
|
|
|
|
bdev->internal.qos = NULL;
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2018-04-20 20:55:51 +00:00
|
|
|
|
2018-07-05 15:57:33 +00:00
|
|
|
while (!TAILQ_EMPTY(&qos->queued)) {
|
|
|
|
/* Send queued I/O back to their original thread for resubmission. */
|
|
|
|
bdev_io = TAILQ_FIRST(&qos->queued);
|
|
|
|
TAILQ_REMOVE(&qos->queued, bdev_io, internal.link);
|
|
|
|
|
|
|
|
if (bdev_io->internal.io_submit_ch) {
|
|
|
|
/*
|
|
|
|
* Channel was changed when sending it to the QoS thread - change it back
|
|
|
|
* before sending it back to the original thread.
|
|
|
|
*/
|
|
|
|
bdev_io->internal.ch = bdev_io->internal.io_submit_ch;
|
|
|
|
bdev_io->internal.io_submit_ch = NULL;
|
|
|
|
}
|
|
|
|
|
2019-05-31 05:49:38 +00:00
|
|
|
spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
|
2018-07-05 15:57:33 +00:00
|
|
|
_spdk_bdev_io_submit, bdev_io);
|
|
|
|
}
|
|
|
|
|
2018-12-18 03:24:16 +00:00
|
|
|
if (qos->thread != NULL) {
|
|
|
|
spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
|
|
|
|
spdk_poller_unregister(&qos->poller);
|
|
|
|
}
|
2018-04-24 22:44:14 +00:00
|
|
|
|
|
|
|
free(qos);
|
2018-04-20 20:55:51 +00:00
|
|
|
|
|
|
|
_spdk_bdev_set_qos_limit_done(ctx, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_disable_qos_msg_done(struct spdk_io_channel_iter *i, int status)
|
|
|
|
{
|
|
|
|
void *io_device = spdk_io_channel_iter_get_io_device(i);
|
|
|
|
struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
|
|
|
|
struct set_qos_limit_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
struct spdk_thread *thread;
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
thread = bdev->internal.qos->thread;
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2018-04-20 20:55:51 +00:00
|
|
|
|
2018-12-18 03:24:16 +00:00
|
|
|
if (thread != NULL) {
|
|
|
|
spdk_thread_send_msg(thread, _spdk_bdev_disable_qos_done, ctx);
|
|
|
|
} else {
|
|
|
|
_spdk_bdev_disable_qos_done(ctx);
|
|
|
|
}
|
2018-04-20 20:55:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_disable_qos_msg(struct spdk_io_channel_iter *i)
|
|
|
|
{
|
|
|
|
struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
|
|
|
|
struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
|
|
|
bdev_ch->flags &= ~BDEV_CH_QOS_ENABLED;
|
|
|
|
|
|
|
|
spdk_for_each_channel_continue(i, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-09-04 15:01:51 +00:00
|
|
|
_spdk_bdev_update_qos_rate_limit_msg(void *cb_arg)
|
2018-04-20 20:55:51 +00:00
|
|
|
{
|
|
|
|
struct set_qos_limit_ctx *ctx = cb_arg;
|
|
|
|
struct spdk_bdev *bdev = ctx->bdev;
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
spdk_bdev_qos_update_max_quota_per_timeslice(bdev->internal.qos);
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-12-29 08:02:08 +00:00
|
|
|
|
|
|
|
_spdk_bdev_set_qos_limit_done(ctx, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_enable_qos_msg(struct spdk_io_channel_iter *i)
|
|
|
|
{
|
|
|
|
void *io_device = spdk_io_channel_iter_get_io_device(i);
|
|
|
|
struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
|
|
|
|
struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
|
|
|
|
struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
2018-07-12 06:35:05 +00:00
|
|
|
_spdk_bdev_enable_qos(bdev, bdev_ch);
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2018-07-12 06:35:05 +00:00
|
|
|
spdk_for_each_channel_continue(i, 0);
|
2017-12-29 08:02:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status)
|
|
|
|
{
|
|
|
|
struct set_qos_limit_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
|
|
|
|
_spdk_bdev_set_qos_limit_done(ctx, status);
|
|
|
|
}
|
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
static void
|
|
|
|
_spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
assert(bdev->internal.qos != NULL);
|
|
|
|
|
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
|
|
|
|
bdev->internal.qos->rate_limits[i].limit = limits[i];
|
|
|
|
|
|
|
|
if (limits[i] == 0) {
|
|
|
|
bdev->internal.qos->rate_limits[i].limit =
|
|
|
|
SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-29 08:02:08 +00:00
|
|
|
void
|
2018-09-04 15:01:51 +00:00
|
|
|
spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits,
|
|
|
|
void (*cb_fn)(void *cb_arg, int status), void *cb_arg)
|
2017-12-29 08:02:08 +00:00
|
|
|
{
|
2018-09-04 15:01:51 +00:00
|
|
|
struct set_qos_limit_ctx *ctx;
|
|
|
|
uint32_t limit_set_complement;
|
|
|
|
uint64_t min_limit_per_sec;
|
|
|
|
int i;
|
|
|
|
bool disable_rate_limit = true;
|
2017-12-29 08:02:08 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (limits[i] > 0) {
|
|
|
|
disable_rate_limit = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) {
|
|
|
|
min_limit_per_sec = SPDK_BDEV_QOS_MIN_IOS_PER_SEC;
|
|
|
|
} else {
|
|
|
|
/* Change from megabyte to byte rate limit */
|
|
|
|
limits[i] = limits[i] * 1024 * 1024;
|
|
|
|
min_limit_per_sec = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
limit_set_complement = limits[i] % min_limit_per_sec;
|
|
|
|
if (limit_set_complement) {
|
|
|
|
SPDK_ERRLOG("Requested rate limit %" PRIu64 " is not a multiple of %" PRIu64 "\n",
|
|
|
|
limits[i], min_limit_per_sec);
|
|
|
|
limits[i] += min_limit_per_sec - limit_set_complement;
|
|
|
|
SPDK_ERRLOG("Round up the rate limit to %" PRIu64 "\n", limits[i]);
|
|
|
|
}
|
2017-12-29 08:02:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ctx = calloc(1, sizeof(*ctx));
|
|
|
|
if (ctx == NULL) {
|
|
|
|
cb_fn(cb_arg, -ENOMEM);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx->cb_fn = cb_fn;
|
|
|
|
ctx->cb_arg = cb_arg;
|
2018-04-20 20:55:51 +00:00
|
|
|
ctx->bdev = bdev;
|
2017-12-29 08:02:08 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
if (bdev->internal.qos_mod_in_progress) {
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2018-04-20 20:55:51 +00:00
|
|
|
free(ctx);
|
|
|
|
cb_fn(cb_arg, -EAGAIN);
|
2017-12-29 08:02:08 +00:00
|
|
|
return;
|
|
|
|
}
|
2018-06-21 20:03:02 +00:00
|
|
|
bdev->internal.qos_mod_in_progress = true;
|
2018-04-24 22:44:14 +00:00
|
|
|
|
2018-09-04 15:01:51 +00:00
|
|
|
if (disable_rate_limit == true && bdev->internal.qos) {
|
|
|
|
for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
|
|
|
|
if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED &&
|
|
|
|
(bdev->internal.qos->rate_limits[i].limit > 0 &&
|
|
|
|
bdev->internal.qos->rate_limits[i].limit !=
|
|
|
|
SPDK_BDEV_QOS_LIMIT_NOT_DEFINED)) {
|
|
|
|
disable_rate_limit = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (disable_rate_limit == false) {
|
2018-06-21 20:03:02 +00:00
|
|
|
if (bdev->internal.qos == NULL) {
|
|
|
|
bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
|
|
|
|
if (!bdev->internal.qos) {
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2018-04-24 22:44:14 +00:00
|
|
|
SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n");
|
2019-08-19 11:59:50 +00:00
|
|
|
_spdk_bdev_set_qos_limit_done(ctx, -ENOMEM);
|
2018-04-24 22:44:14 +00:00
|
|
|
return;
|
|
|
|
}
|
2018-12-18 03:24:16 +00:00
|
|
|
}
|
2017-12-29 08:02:08 +00:00
|
|
|
|
2018-12-18 03:24:16 +00:00
|
|
|
if (bdev->internal.qos->thread == NULL) {
|
|
|
|
/* Enabling */
|
2018-09-04 15:01:51 +00:00
|
|
|
_spdk_bdev_set_qos_rate_limits(bdev, limits);
|
|
|
|
|
2018-04-24 22:44:14 +00:00
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev),
|
|
|
|
_spdk_bdev_enable_qos_msg, ctx,
|
|
|
|
_spdk_bdev_enable_qos_done);
|
|
|
|
} else {
|
|
|
|
/* Updating */
|
2018-09-04 15:01:51 +00:00
|
|
|
_spdk_bdev_set_qos_rate_limits(bdev, limits);
|
|
|
|
|
|
|
|
spdk_thread_send_msg(bdev->internal.qos->thread,
|
|
|
|
_spdk_bdev_update_qos_rate_limit_msg, ctx);
|
2018-04-24 22:44:14 +00:00
|
|
|
}
|
|
|
|
} else {
|
2018-06-21 20:03:02 +00:00
|
|
|
if (bdev->internal.qos != NULL) {
|
2018-09-04 15:01:51 +00:00
|
|
|
_spdk_bdev_set_qos_rate_limits(bdev, limits);
|
|
|
|
|
2018-04-20 20:55:51 +00:00
|
|
|
/* Disabling */
|
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev),
|
|
|
|
_spdk_bdev_disable_qos_msg, ctx,
|
|
|
|
_spdk_bdev_disable_qos_msg_done);
|
|
|
|
} else {
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2018-04-24 22:44:14 +00:00
|
|
|
_spdk_bdev_set_qos_limit_done(ctx, 0);
|
|
|
|
return;
|
2018-04-20 20:55:51 +00:00
|
|
|
}
|
|
|
|
}
|
2018-04-24 22:44:14 +00:00
|
|
|
|
2018-06-21 20:03:02 +00:00
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
2017-12-29 08:02:08 +00:00
|
|
|
}
|
|
|
|
|
2018-11-19 13:31:19 +00:00
|
|
|
struct spdk_bdev_histogram_ctx {
|
|
|
|
spdk_bdev_histogram_status_cb cb_fn;
|
|
|
|
void *cb_arg;
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
int status;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_histogram_disable_channel_cb(struct spdk_io_channel_iter *i, int status)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_histogram_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
|
|
|
|
pthread_mutex_lock(&ctx->bdev->internal.mutex);
|
|
|
|
ctx->bdev->internal.histogram_in_progress = false;
|
|
|
|
pthread_mutex_unlock(&ctx->bdev->internal.mutex);
|
|
|
|
ctx->cb_fn(ctx->cb_arg, ctx->status);
|
|
|
|
free(ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_histogram_disable_channel(struct spdk_io_channel_iter *i)
|
|
|
|
{
|
|
|
|
struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
|
|
|
|
struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch);
|
|
|
|
|
|
|
|
if (ch->histogram != NULL) {
|
|
|
|
spdk_histogram_data_free(ch->histogram);
|
|
|
|
ch->histogram = NULL;
|
|
|
|
}
|
|
|
|
spdk_for_each_channel_continue(i, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_histogram_enable_channel_cb(struct spdk_io_channel_iter *i, int status)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_histogram_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
|
|
|
|
if (status != 0) {
|
|
|
|
ctx->status = status;
|
|
|
|
ctx->bdev->internal.histogram_enabled = false;
|
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(ctx->bdev), _spdk_bdev_histogram_disable_channel, ctx,
|
|
|
|
_spdk_bdev_histogram_disable_channel_cb);
|
|
|
|
} else {
|
|
|
|
pthread_mutex_lock(&ctx->bdev->internal.mutex);
|
|
|
|
ctx->bdev->internal.histogram_in_progress = false;
|
|
|
|
pthread_mutex_unlock(&ctx->bdev->internal.mutex);
|
|
|
|
ctx->cb_fn(ctx->cb_arg, ctx->status);
|
|
|
|
free(ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_histogram_enable_channel(struct spdk_io_channel_iter *i)
|
|
|
|
{
|
|
|
|
struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
|
|
|
|
struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch);
|
|
|
|
int status = 0;
|
|
|
|
|
|
|
|
if (ch->histogram == NULL) {
|
|
|
|
ch->histogram = spdk_histogram_data_alloc();
|
|
|
|
if (ch->histogram == NULL) {
|
|
|
|
status = -ENOMEM;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
spdk_for_each_channel_continue(i, status);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spdk_bdev_histogram_enable(struct spdk_bdev *bdev, spdk_bdev_histogram_status_cb cb_fn,
|
|
|
|
void *cb_arg, bool enable)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_histogram_ctx *ctx;
|
|
|
|
|
|
|
|
ctx = calloc(1, sizeof(struct spdk_bdev_histogram_ctx));
|
|
|
|
if (ctx == NULL) {
|
|
|
|
cb_fn(cb_arg, -ENOMEM);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx->bdev = bdev;
|
|
|
|
ctx->status = 0;
|
|
|
|
ctx->cb_fn = cb_fn;
|
|
|
|
ctx->cb_arg = cb_arg;
|
|
|
|
|
|
|
|
pthread_mutex_lock(&bdev->internal.mutex);
|
|
|
|
if (bdev->internal.histogram_in_progress) {
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
|
|
|
free(ctx);
|
|
|
|
cb_fn(cb_arg, -EAGAIN);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev->internal.histogram_in_progress = true;
|
|
|
|
pthread_mutex_unlock(&bdev->internal.mutex);
|
|
|
|
|
|
|
|
bdev->internal.histogram_enabled = enable;
|
|
|
|
|
|
|
|
if (enable) {
|
|
|
|
/* Allocate histogram for each channel */
|
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_histogram_enable_channel, ctx,
|
|
|
|
_spdk_bdev_histogram_enable_channel_cb);
|
|
|
|
} else {
|
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_histogram_disable_channel, ctx,
|
|
|
|
_spdk_bdev_histogram_disable_channel_cb);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct spdk_bdev_histogram_data_ctx {
|
|
|
|
spdk_bdev_histogram_data_cb cb_fn;
|
|
|
|
void *cb_arg;
|
|
|
|
struct spdk_bdev *bdev;
|
|
|
|
/** merged histogram data from all channels */
|
|
|
|
struct spdk_histogram_data *histogram;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_histogram_get_channel_cb(struct spdk_io_channel_iter *i, int status)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_histogram_data_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
|
|
|
|
ctx->cb_fn(ctx->cb_arg, status, ctx->histogram);
|
|
|
|
free(ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_spdk_bdev_histogram_get_channel(struct spdk_io_channel_iter *i)
|
|
|
|
{
|
|
|
|
struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
|
|
|
|
struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch);
|
|
|
|
struct spdk_bdev_histogram_data_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
|
|
|
|
int status = 0;
|
|
|
|
|
|
|
|
if (ch->histogram == NULL) {
|
|
|
|
status = -EFAULT;
|
|
|
|
} else {
|
|
|
|
spdk_histogram_data_merge(ctx->histogram, ch->histogram);
|
|
|
|
}
|
|
|
|
|
|
|
|
spdk_for_each_channel_continue(i, status);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data *histogram,
|
|
|
|
spdk_bdev_histogram_data_cb cb_fn,
|
|
|
|
void *cb_arg)
|
|
|
|
{
|
|
|
|
struct spdk_bdev_histogram_data_ctx *ctx;
|
|
|
|
|
|
|
|
ctx = calloc(1, sizeof(struct spdk_bdev_histogram_data_ctx));
|
|
|
|
if (ctx == NULL) {
|
|
|
|
cb_fn(cb_arg, -ENOMEM, NULL);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx->bdev = bdev;
|
|
|
|
ctx->cb_fn = cb_fn;
|
|
|
|
ctx->cb_arg = cb_arg;
|
|
|
|
|
|
|
|
ctx->histogram = histogram;
|
|
|
|
|
|
|
|
spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_histogram_get_channel, ctx,
|
|
|
|
_spdk_bdev_histogram_get_channel_cb);
|
|
|
|
}
|
|
|
|
|
2017-08-30 18:06:33 +00:00
|
|
|
SPDK_LOG_REGISTER_COMPONENT("bdev", SPDK_LOG_BDEV)
|
2018-08-31 20:13:32 +00:00
|
|
|
|
2018-11-29 08:11:57 +00:00
|
|
|
SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV)
|
2018-08-31 20:13:32 +00:00
|
|
|
{
|
|
|
|
spdk_trace_register_owner(OWNER_BDEV, 'b');
|
|
|
|
spdk_trace_register_object(OBJECT_BDEV_IO, 'i');
|
2019-04-30 20:45:03 +00:00
|
|
|
spdk_trace_register_description("BDEV_IO_START", TRACE_BDEV_IO_START, OWNER_BDEV,
|
2018-09-04 14:49:33 +00:00
|
|
|
OBJECT_BDEV_IO, 1, 0, "type: ");
|
2019-04-30 20:45:03 +00:00
|
|
|
spdk_trace_register_description("BDEV_IO_DONE", TRACE_BDEV_IO_DONE, OWNER_BDEV,
|
2018-09-04 14:49:33 +00:00
|
|
|
OBJECT_BDEV_IO, 0, 0, "");
|
2018-08-31 20:13:32 +00:00
|
|
|
}
|