bdev: Add extended versions of readv/writev

New functions accept extendable structure of IO options

Change-Id: If6864df151a3c0ad722785cb26d1f5d4309cd733
Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6269
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
This commit is contained in:
Alexey Marchuk 2021-05-31 18:43:43 +03:00 committed by Tomasz Zawadzki
parent c3a5848966
commit ac6f2bdd8d
6 changed files with 235 additions and 13 deletions

View File

@ -6,6 +6,11 @@
New API `spdk_bdev_get_memory_domains` has been added, it allows to get SPDK memory domains used by bdev.
New API functions `spdk_bdev_readv_blocks_ext` and `spdk_bdev_writev_blocks_ext` have been added.
These functions accept `spdk_bdev_ext_io_opts` structure with extended IO request
options, e.g. DMA memory domain which describes data that may belong to another memory domain and
can't be accessed directly.
### dma
A new library, lib/dma, has been added. This library provides the necessary infrastructure for

View File

@ -81,9 +81,6 @@ struct spdk_bdev_media_event {
*/
struct spdk_bdev;
/** Forward declaration of spdk memory domain */
struct spdk_memory_domain;
/**
* Block device remove callback.
*
@ -203,6 +200,24 @@ struct spdk_bdev_opts {
uint32_t large_buf_pool_size;
};
/**
* Structure with optional IO request parameters
* The content of this structure must be valid until the IO request is completed
*/
struct spdk_bdev_ext_io_opts {
/** Size of this structure in bytes */
size_t size;
/** Memory domain which describes payload in this IO request. bdev must support DMA device type that
* can access this memory domain, refer to \ref spdk_bdev_get_memory_domains and \erf spdk_memory_domain_get_dma_device_type
* If set, that means that data buffers can't be accessed directly and the memory domain must
* be used to fetch data to local buffers or to translate data to another memory domain */
struct spdk_memory_domain *memory_domain;
/** Context to be passed to memory domain operations */
void *memory_domain_ctx;
/** Metadata buffer, optional */
void *metadata;
};
/**
* Get the options for the bdev module.
*
@ -900,6 +915,40 @@ int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_c
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a read request to the bdev on the given channel. This differs from
* spdk_bdev_read by allowing the data buffer to be described in a scatter
* gather list. Some physical devices place memory alignment requirements on
* data or metadata and may not be able to directly transfer into the buffers
* provided. In this case, the request may fail. This function uses separate
* buffer for metadata transfer (valid only if bdev supports this mode).
*
* \ingroup bdev_io_submit_functions
*
* \param desc Block device descriptor.
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
* \param iov A scatter gather list of buffers to be read into.
* \param iovcnt The number of elements in iov.
* \param md Metadata buffer, optional.
* \param offset_blocks The offset, in blocks, from the start of the block device.
* \param num_blocks The number of blocks to read.
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
* \param opts Optional structure with extended IO request options. If set, this structure must be
* valid until the IO is completed.
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
* negated errno on failure, in which case the callback will not be called.
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
* metadata is not supported or opts_size is incorrect
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
*/
int spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts);
/**
* Submit a write request to the bdev on the given channel.
*
@ -1069,6 +1118,41 @@ int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a write request to the bdev on the given channel. This differs from
* spdk_bdev_write by allowing the data buffer to be described in a scatter
* gather list. Some physical devices place memory alignment requirements on
* data or metadata and may not be able to directly transfer out of the buffers
* provided. In this case, the request may fail. This function uses separate
* buffer for metadata transfer (valid only if bdev supports this mode).
*
* \ingroup bdev_io_submit_functions
*
* \param desc Block device descriptor.
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
* \param iov A scatter gather list of buffers to be written from.
* \param iovcnt The number of elements in iov.
* \param md Metadata buffer, optional.
* \param offset_blocks The offset, in blocks, from the start of the block device.
* \param num_blocks The number of blocks to write.
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
* \param opts Optional structure with extended IO request options. If set, this structure must be
* valid until the IO is completed.
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
* negated errno on failure, in which case the callback will not be called.
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
* metadata is not supported or opts_size is incorrect
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
* * -EBADF - desc not open for writing
*/
int spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts);
/**
* Submit a compare request to the bdev on the given channel.
*

View File

@ -750,6 +750,9 @@ struct spdk_bdev_io {
/** Enables queuing parent I/O when no bdev_ios available for split children. */
struct spdk_bdev_io_wait_entry waitq_entry;
/** Pointer to a structure passed by the user in ext API */
struct spdk_bdev_ext_io_opts *ext_opts;
} internal;
/**

View File

@ -364,12 +364,14 @@ static void bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status);
static int
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg);
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts);
static int
bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts);
static int
bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
@ -2119,14 +2121,16 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
spdk_io_channel_from_ctx(bdev_io->internal.ch),
iov, iovcnt, md_buf, current_offset,
num_blocks,
bdev_io_split_done, bdev_io);
bdev_io_split_done, bdev_io,
bdev_io->internal.ext_opts);
break;
case SPDK_BDEV_IO_TYPE_WRITE:
rc = bdev_writev_blocks_with_md(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
iov, iovcnt, md_buf, current_offset,
num_blocks,
bdev_io_split_done, bdev_io);
bdev_io_split_done, bdev_io,
bdev_io->internal.ext_opts);
break;
case SPDK_BDEV_IO_TYPE_UNMAP:
io_wait_fn = _bdev_unmap_split;
@ -2624,6 +2628,7 @@ bdev_io_init(struct spdk_bdev_io *bdev_io,
bdev_io->num_retries = 0;
bdev_io->internal.get_buf_cb = NULL;
bdev_io->internal.get_aux_buf_cb = NULL;
bdev_io->internal.ext_opts = NULL;
}
static bool
@ -3834,7 +3839,8 @@ spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
static int
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg)
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts)
{
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
struct spdk_bdev_io *bdev_io;
@ -3858,6 +3864,7 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
bdev_io_init(bdev_io, bdev, cb_arg, cb);
bdev_io->internal.ext_opts = opts;
bdev_io_submit(bdev_io);
return 0;
@ -3869,7 +3876,7 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
num_blocks, cb, cb_arg);
num_blocks, cb, cb_arg, NULL);
}
int
@ -3887,7 +3894,32 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann
}
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
num_blocks, cb, cb_arg);
num_blocks, cb, cb_arg, NULL);
}
int
spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts)
{
void *md = NULL;
if (opts) {
md = opts->metadata;
}
if (md && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
return -EINVAL;
}
if (md && !_bdev_io_check_md_buf(iov, md)) {
return -EINVAL;
}
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
num_blocks, cb, cb_arg, opts);
}
static int
@ -3977,7 +4009,8 @@ static int
bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts)
{
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
struct spdk_bdev_io *bdev_io;
@ -4005,6 +4038,7 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
bdev_io_init(bdev_io, bdev, cb_arg, cb);
bdev_io->internal.ext_opts = opts;
bdev_io_submit(bdev_io);
return 0;
@ -4033,7 +4067,7 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
num_blocks, cb, cb_arg);
num_blocks, cb, cb_arg, NULL);
}
int
@ -4051,7 +4085,32 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan
}
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
num_blocks, cb, cb_arg);
num_blocks, cb, cb_arg, NULL);
}
int
spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts)
{
void *md = NULL;
if (opts) {
md = opts->metadata;
}
if (md && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
return -EINVAL;
}
if (md && !_bdev_io_check_md_buf(iov, md)) {
return -EINVAL;
}
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
num_blocks, cb, cb_arg, opts);
}
static void

View File

@ -95,6 +95,8 @@
spdk_bdev_histogram_get;
spdk_bdev_get_media_events;
spdk_bdev_get_memory_domains;
spdk_bdev_readv_blocks_ext;
spdk_bdev_writev_blocks_ext;
# Public functions in bdev_module.h
spdk_bdev_register;

View File

@ -85,6 +85,7 @@ struct ut_expected_io {
int iovcnt;
struct iovec iov[BDEV_IO_NUM_CHILD_IOV];
void *md_buf;
struct spdk_bdev_ext_io_opts *ext_io_opts;
TAILQ_ENTRY(ut_expected_io) link;
};
@ -261,6 +262,10 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
CU_ASSERT(iov->iov_base == expected_iov->iov_base);
}
if (expected_io->ext_io_opts) {
CU_ASSERT(expected_io->ext_io_opts == bdev_io->internal.ext_opts)
}
free(expected_io);
}
@ -4818,6 +4823,69 @@ bdev_get_memory_domains(void)
CU_ASSERT(rc == 0);
}
static void
bdev_writev_readv_ext(void)
{
struct spdk_bdev *bdev;
struct spdk_bdev_desc *desc = NULL;
struct spdk_io_channel *io_ch;
struct iovec iov = { .iov_base = (void *)0xbaaddead, .iov_len = 0x1000 };
struct ut_expected_io *expected_io;
struct spdk_bdev_ext_io_opts ext_io_opts = {
.metadata = (void *)0xFF000000
};
int rc;
spdk_bdev_initialize(bdev_init_cb, NULL);
bdev = allocate_bdev("bdev0");
bdev->md_interleave = false;
bdev->md_len = 8;
rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
CU_ASSERT(rc == 0);
SPDK_CU_ASSERT_FATAL(desc != NULL);
CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
io_ch = spdk_bdev_get_io_channel(desc);
CU_ASSERT(io_ch != NULL);
g_io_done = false;
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 32, 14, 1);
expected_io->md_buf = ext_io_opts.metadata;
expected_io->ext_io_opts = &ext_io_opts;
ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc == 0);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == true);
g_io_done = false;
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1);
expected_io->md_buf = ext_io_opts.metadata;
expected_io->ext_io_opts = &ext_io_opts;
ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc == 0);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == true);
spdk_put_io_channel(io_ch);
spdk_bdev_close(desc);
free_bdev(bdev);
spdk_bdev_finish(bdev_fini_cb, NULL);
poll_threads();
}
int
main(int argc, char **argv)
{
@ -4864,6 +4932,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, bdev_set_options_test);
CU_ADD_TEST(suite, bdev_multi_allocation);
CU_ADD_TEST(suite, bdev_get_memory_domains);
CU_ADD_TEST(suite, bdev_writev_readv_ext);
allocate_cores(1);
allocate_threads(1);