bdev: Add extended versions of readv/writev
New functions accept extendable structure of IO options Change-Id: If6864df151a3c0ad722785cb26d1f5d4309cd733 Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6269 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Ziye Yang <ziye.yang@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot
This commit is contained in:
parent
c3a5848966
commit
ac6f2bdd8d
@ -6,6 +6,11 @@
|
||||
|
||||
New API `spdk_bdev_get_memory_domains` has been added, it allows to get SPDK memory domains used by bdev.
|
||||
|
||||
New API functions `spdk_bdev_readv_blocks_ext` and `spdk_bdev_writev_blocks_ext` have been added.
|
||||
These functions accept `spdk_bdev_ext_io_opts` structure with extended IO request
|
||||
options, e.g. DMA memory domain which describes data that may belong to another memory domain and
|
||||
can't be accessed directly.
|
||||
|
||||
### dma
|
||||
|
||||
A new library, lib/dma, has been added. This library provides the necessary infrastructure for
|
||||
|
@ -81,9 +81,6 @@ struct spdk_bdev_media_event {
|
||||
*/
|
||||
struct spdk_bdev;
|
||||
|
||||
/** Forward declaration of spdk memory domain */
|
||||
struct spdk_memory_domain;
|
||||
|
||||
/**
|
||||
* Block device remove callback.
|
||||
*
|
||||
@ -203,6 +200,24 @@ struct spdk_bdev_opts {
|
||||
uint32_t large_buf_pool_size;
|
||||
};
|
||||
|
||||
/**
|
||||
* Structure with optional IO request parameters
|
||||
* The content of this structure must be valid until the IO request is completed
|
||||
*/
|
||||
struct spdk_bdev_ext_io_opts {
|
||||
/** Size of this structure in bytes */
|
||||
size_t size;
|
||||
/** Memory domain which describes payload in this IO request. bdev must support DMA device type that
|
||||
* can access this memory domain, refer to \ref spdk_bdev_get_memory_domains and \erf spdk_memory_domain_get_dma_device_type
|
||||
* If set, that means that data buffers can't be accessed directly and the memory domain must
|
||||
* be used to fetch data to local buffers or to translate data to another memory domain */
|
||||
struct spdk_memory_domain *memory_domain;
|
||||
/** Context to be passed to memory domain operations */
|
||||
void *memory_domain_ctx;
|
||||
/** Metadata buffer, optional */
|
||||
void *metadata;
|
||||
};
|
||||
|
||||
/**
|
||||
* Get the options for the bdev module.
|
||||
*
|
||||
@ -900,6 +915,40 @@ int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_c
|
||||
uint64_t offset_blocks, uint64_t num_blocks,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg);
|
||||
|
||||
/**
|
||||
* Submit a read request to the bdev on the given channel. This differs from
|
||||
* spdk_bdev_read by allowing the data buffer to be described in a scatter
|
||||
* gather list. Some physical devices place memory alignment requirements on
|
||||
* data or metadata and may not be able to directly transfer into the buffers
|
||||
* provided. In this case, the request may fail. This function uses separate
|
||||
* buffer for metadata transfer (valid only if bdev supports this mode).
|
||||
*
|
||||
* \ingroup bdev_io_submit_functions
|
||||
*
|
||||
* \param desc Block device descriptor.
|
||||
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
|
||||
* \param iov A scatter gather list of buffers to be read into.
|
||||
* \param iovcnt The number of elements in iov.
|
||||
* \param md Metadata buffer, optional.
|
||||
* \param offset_blocks The offset, in blocks, from the start of the block device.
|
||||
* \param num_blocks The number of blocks to read.
|
||||
* \param cb Called when the request is complete.
|
||||
* \param cb_arg Argument passed to cb.
|
||||
* \param opts Optional structure with extended IO request options. If set, this structure must be
|
||||
* valid until the IO is completed.
|
||||
*
|
||||
* \return 0 on success. On success, the callback will always
|
||||
* be called (even if the request ultimately failed). Return
|
||||
* negated errno on failure, in which case the callback will not be called.
|
||||
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
|
||||
* metadata is not supported or opts_size is incorrect
|
||||
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
|
||||
*/
|
||||
int spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
struct iovec *iov, int iovcnt, uint64_t offset_blocks,
|
||||
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
|
||||
struct spdk_bdev_ext_io_opts *opts);
|
||||
|
||||
/**
|
||||
* Submit a write request to the bdev on the given channel.
|
||||
*
|
||||
@ -1069,6 +1118,41 @@ int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_
|
||||
uint64_t offset_blocks, uint64_t num_blocks,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg);
|
||||
|
||||
/**
|
||||
* Submit a write request to the bdev on the given channel. This differs from
|
||||
* spdk_bdev_write by allowing the data buffer to be described in a scatter
|
||||
* gather list. Some physical devices place memory alignment requirements on
|
||||
* data or metadata and may not be able to directly transfer out of the buffers
|
||||
* provided. In this case, the request may fail. This function uses separate
|
||||
* buffer for metadata transfer (valid only if bdev supports this mode).
|
||||
*
|
||||
* \ingroup bdev_io_submit_functions
|
||||
*
|
||||
* \param desc Block device descriptor.
|
||||
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
|
||||
* \param iov A scatter gather list of buffers to be written from.
|
||||
* \param iovcnt The number of elements in iov.
|
||||
* \param md Metadata buffer, optional.
|
||||
* \param offset_blocks The offset, in blocks, from the start of the block device.
|
||||
* \param num_blocks The number of blocks to write.
|
||||
* \param cb Called when the request is complete.
|
||||
* \param cb_arg Argument passed to cb.
|
||||
* \param opts Optional structure with extended IO request options. If set, this structure must be
|
||||
* valid until the IO is completed.
|
||||
*
|
||||
* \return 0 on success. On success, the callback will always
|
||||
* be called (even if the request ultimately failed). Return
|
||||
* negated errno on failure, in which case the callback will not be called.
|
||||
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
|
||||
* metadata is not supported or opts_size is incorrect
|
||||
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
|
||||
* * -EBADF - desc not open for writing
|
||||
*/
|
||||
int spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
struct iovec *iov, int iovcnt, uint64_t offset_blocks,
|
||||
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
|
||||
struct spdk_bdev_ext_io_opts *opts);
|
||||
|
||||
/**
|
||||
* Submit a compare request to the bdev on the given channel.
|
||||
*
|
||||
|
@ -750,6 +750,9 @@ struct spdk_bdev_io {
|
||||
|
||||
/** Enables queuing parent I/O when no bdev_ios available for split children. */
|
||||
struct spdk_bdev_io_wait_entry waitq_entry;
|
||||
|
||||
/** Pointer to a structure passed by the user in ext API */
|
||||
struct spdk_bdev_ext_io_opts *ext_opts;
|
||||
} internal;
|
||||
|
||||
/**
|
||||
|
@ -364,12 +364,14 @@ static void bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status);
|
||||
static int
|
||||
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
|
||||
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg);
|
||||
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
|
||||
struct spdk_bdev_ext_io_opts *opts);
|
||||
static int
|
||||
bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
struct iovec *iov, int iovcnt, void *md_buf,
|
||||
uint64_t offset_blocks, uint64_t num_blocks,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg);
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg,
|
||||
struct spdk_bdev_ext_io_opts *opts);
|
||||
|
||||
static int
|
||||
bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
|
||||
@ -2119,14 +2121,16 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
|
||||
spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
||||
iov, iovcnt, md_buf, current_offset,
|
||||
num_blocks,
|
||||
bdev_io_split_done, bdev_io);
|
||||
bdev_io_split_done, bdev_io,
|
||||
bdev_io->internal.ext_opts);
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
rc = bdev_writev_blocks_with_md(bdev_io->internal.desc,
|
||||
spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
||||
iov, iovcnt, md_buf, current_offset,
|
||||
num_blocks,
|
||||
bdev_io_split_done, bdev_io);
|
||||
bdev_io_split_done, bdev_io,
|
||||
bdev_io->internal.ext_opts);
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_UNMAP:
|
||||
io_wait_fn = _bdev_unmap_split;
|
||||
@ -2624,6 +2628,7 @@ bdev_io_init(struct spdk_bdev_io *bdev_io,
|
||||
bdev_io->num_retries = 0;
|
||||
bdev_io->internal.get_buf_cb = NULL;
|
||||
bdev_io->internal.get_aux_buf_cb = NULL;
|
||||
bdev_io->internal.ext_opts = NULL;
|
||||
}
|
||||
|
||||
static bool
|
||||
@ -3834,7 +3839,8 @@ spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
static int
|
||||
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
|
||||
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg)
|
||||
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
|
||||
struct spdk_bdev_ext_io_opts *opts)
|
||||
{
|
||||
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
||||
struct spdk_bdev_io *bdev_io;
|
||||
@ -3858,6 +3864,7 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
|
||||
bdev_io->u.bdev.num_blocks = num_blocks;
|
||||
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
||||
bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
||||
bdev_io->internal.ext_opts = opts;
|
||||
|
||||
bdev_io_submit(bdev_io);
|
||||
return 0;
|
||||
@ -3869,7 +3876,7 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
||||
{
|
||||
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
|
||||
num_blocks, cb, cb_arg);
|
||||
num_blocks, cb, cb_arg, NULL);
|
||||
}
|
||||
|
||||
int
|
||||
@ -3887,7 +3894,32 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann
|
||||
}
|
||||
|
||||
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
|
||||
num_blocks, cb, cb_arg);
|
||||
num_blocks, cb, cb_arg, NULL);
|
||||
}
|
||||
|
||||
int
|
||||
spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
struct iovec *iov, int iovcnt,
|
||||
uint64_t offset_blocks, uint64_t num_blocks,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg,
|
||||
struct spdk_bdev_ext_io_opts *opts)
|
||||
{
|
||||
void *md = NULL;
|
||||
|
||||
if (opts) {
|
||||
md = opts->metadata;
|
||||
}
|
||||
|
||||
if (md && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (md && !_bdev_io_check_md_buf(iov, md)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
|
||||
num_blocks, cb, cb_arg, opts);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -3977,7 +4009,8 @@ static int
|
||||
bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
struct iovec *iov, int iovcnt, void *md_buf,
|
||||
uint64_t offset_blocks, uint64_t num_blocks,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg,
|
||||
struct spdk_bdev_ext_io_opts *opts)
|
||||
{
|
||||
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
|
||||
struct spdk_bdev_io *bdev_io;
|
||||
@ -4005,6 +4038,7 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *
|
||||
bdev_io->u.bdev.num_blocks = num_blocks;
|
||||
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
||||
bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
||||
bdev_io->internal.ext_opts = opts;
|
||||
|
||||
bdev_io_submit(bdev_io);
|
||||
return 0;
|
||||
@ -4033,7 +4067,7 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
||||
{
|
||||
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
|
||||
num_blocks, cb, cb_arg);
|
||||
num_blocks, cb, cb_arg, NULL);
|
||||
}
|
||||
|
||||
int
|
||||
@ -4051,7 +4085,32 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan
|
||||
}
|
||||
|
||||
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
|
||||
num_blocks, cb, cb_arg);
|
||||
num_blocks, cb, cb_arg, NULL);
|
||||
}
|
||||
|
||||
int
|
||||
spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
struct iovec *iov, int iovcnt,
|
||||
uint64_t offset_blocks, uint64_t num_blocks,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg,
|
||||
struct spdk_bdev_ext_io_opts *opts)
|
||||
{
|
||||
void *md = NULL;
|
||||
|
||||
if (opts) {
|
||||
md = opts->metadata;
|
||||
}
|
||||
|
||||
if (md && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (md && !_bdev_io_check_md_buf(iov, md)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
|
||||
num_blocks, cb, cb_arg, opts);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -95,6 +95,8 @@
|
||||
spdk_bdev_histogram_get;
|
||||
spdk_bdev_get_media_events;
|
||||
spdk_bdev_get_memory_domains;
|
||||
spdk_bdev_readv_blocks_ext;
|
||||
spdk_bdev_writev_blocks_ext;
|
||||
|
||||
# Public functions in bdev_module.h
|
||||
spdk_bdev_register;
|
||||
|
@ -85,6 +85,7 @@ struct ut_expected_io {
|
||||
int iovcnt;
|
||||
struct iovec iov[BDEV_IO_NUM_CHILD_IOV];
|
||||
void *md_buf;
|
||||
struct spdk_bdev_ext_io_opts *ext_io_opts;
|
||||
TAILQ_ENTRY(ut_expected_io) link;
|
||||
};
|
||||
|
||||
@ -261,6 +262,10 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
|
||||
CU_ASSERT(iov->iov_base == expected_iov->iov_base);
|
||||
}
|
||||
|
||||
if (expected_io->ext_io_opts) {
|
||||
CU_ASSERT(expected_io->ext_io_opts == bdev_io->internal.ext_opts)
|
||||
}
|
||||
|
||||
free(expected_io);
|
||||
}
|
||||
|
||||
@ -4818,6 +4823,69 @@ bdev_get_memory_domains(void)
|
||||
CU_ASSERT(rc == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_writev_readv_ext(void)
|
||||
{
|
||||
struct spdk_bdev *bdev;
|
||||
struct spdk_bdev_desc *desc = NULL;
|
||||
struct spdk_io_channel *io_ch;
|
||||
struct iovec iov = { .iov_base = (void *)0xbaaddead, .iov_len = 0x1000 };
|
||||
struct ut_expected_io *expected_io;
|
||||
struct spdk_bdev_ext_io_opts ext_io_opts = {
|
||||
.metadata = (void *)0xFF000000
|
||||
};
|
||||
int rc;
|
||||
|
||||
spdk_bdev_initialize(bdev_init_cb, NULL);
|
||||
|
||||
bdev = allocate_bdev("bdev0");
|
||||
bdev->md_interleave = false;
|
||||
bdev->md_len = 8;
|
||||
|
||||
rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
|
||||
CU_ASSERT(rc == 0);
|
||||
SPDK_CU_ASSERT_FATAL(desc != NULL);
|
||||
CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
|
||||
io_ch = spdk_bdev_get_io_channel(desc);
|
||||
CU_ASSERT(io_ch != NULL);
|
||||
|
||||
g_io_done = false;
|
||||
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 32, 14, 1);
|
||||
expected_io->md_buf = ext_io_opts.metadata;
|
||||
expected_io->ext_io_opts = &ext_io_opts;
|
||||
ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
|
||||
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
|
||||
|
||||
rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
|
||||
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == true);
|
||||
|
||||
g_io_done = false;
|
||||
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1);
|
||||
expected_io->md_buf = ext_io_opts.metadata;
|
||||
expected_io->ext_io_opts = &ext_io_opts;
|
||||
ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
|
||||
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
|
||||
|
||||
rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
|
||||
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == true);
|
||||
|
||||
spdk_put_io_channel(io_ch);
|
||||
spdk_bdev_close(desc);
|
||||
free_bdev(bdev);
|
||||
spdk_bdev_finish(bdev_fini_cb, NULL);
|
||||
poll_threads();
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
@ -4864,6 +4932,7 @@ main(int argc, char **argv)
|
||||
CU_ADD_TEST(suite, bdev_set_options_test);
|
||||
CU_ADD_TEST(suite, bdev_multi_allocation);
|
||||
CU_ADD_TEST(suite, bdev_get_memory_domains);
|
||||
CU_ADD_TEST(suite, bdev_writev_readv_ext);
|
||||
|
||||
allocate_cores(1);
|
||||
allocate_threads(1);
|
||||
|
Loading…
Reference in New Issue
Block a user