lib/bdev: separate metadata buffer IO functions

This patch adds *_with_md family of functions allowing for IO with
metadata being transferred in a separate buffer.

Change-Id: I842d5a00a532cf5d0b0f0738535ea46903674140
Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/451465
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Konrad Sztyber 2019-04-16 10:12:09 +02:00 committed by Jim Harris
parent 617a004fb9
commit 58663640d2
3 changed files with 451 additions and 68 deletions

View File

@ -436,6 +436,18 @@ uint32_t spdk_bdev_get_md_size(const struct spdk_bdev *bdev);
*/
bool spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev);
/**
* Query whether metadata is interleaved with block data or separated
* from block data.
*
* \param bdev Block device to query.
* \return true if metadata is separated from block data, false
* otherwise.
*
* Note this function is valid only if there is metadata.
*/
bool spdk_bdev_is_md_separate(const struct spdk_bdev *bdev);
/**
* Get block device data block size.
*
@ -620,6 +632,33 @@ int spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
void *buf, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a read request to the bdev on the given channel. This function uses
* separate buffer for metadata transfer (valid only if bdev supports this
* mode).
*
* \ingroup bdev_io_submit_functions
*
* \param desc Block device descriptor.
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
* \param buf Data buffer to read into.
* \param md Metadata buffer.
* \param offset_blocks The offset, in blocks, from the start of the block device.
* \param num_blocks The number of blocks to read.
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
* negated errno on failure, in which case the callback will not be called.
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
* metadata is not supported
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
*/
int spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, void *md, int64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a read request to the bdev on the given channel. This differs from
* spdk_bdev_read by allowing the data buffer to be described in a scatter
@ -678,6 +717,38 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a read request to the bdev on the given channel. This differs from
* spdk_bdev_read by allowing the data buffer to be described in a scatter
* gather list. Some physical devices place memory alignment requirements on
* data or metadata and may not be able to directly transfer into the buffers
* provided. In this case, the request may fail. This function uses separate
* buffer for metadata transfer (valid only if bdev supports this mode).
*
* \ingroup bdev_io_submit_functions
*
* \param desc Block device descriptor.
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
* \param iov A scatter gather list of buffers to be read into.
* \param iovcnt The number of elements in iov.
* \param md Metadata buffer.
* \param offset_blocks The offset, in blocks, from the start of the block device.
* \param num_blocks The number of blocks to read.
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
* negated errno on failure, in which case the callback will not be called.
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
* metadata is not supported
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
*/
int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a write request to the bdev on the given channel.
*
@ -726,6 +797,34 @@ int spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *
void *buf, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a write request to the bdev on the given channel. This function uses
* separate buffer for metadata transfer (valid only if bdev supports this
* mode).
*
* \ingroup bdev_io_submit_functions
*
* \param desc Block device descriptor.
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
* \param buf Data buffer to written from.
* \param md Metadata buffer.
* \param offset_blocks The offset, in blocks, from the start of the block device.
* \param num_blocks The number of blocks to write. buf must be greater than or equal to this size.
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
* negated errno on failure, in which case the callback will not be called.
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
* metadata is not supported
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
* * -EBADF - desc not open for writing
*/
int spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, void *md, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a write request to the bdev on the given channel. This differs from
* spdk_bdev_write by allowing the data buffer to be described in a scatter
@ -786,6 +885,39 @@ int spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a write request to the bdev on the given channel. This differs from
* spdk_bdev_write by allowing the data buffer to be described in a scatter
* gather list. Some physical devices place memory alignment requirements on
* data or metadata and may not be able to directly transfer out of the buffers
* provided. In this case, the request may fail. This function uses separate
* buffer for metadata transfer (valid only if bdev supports this mode).
*
* \ingroup bdev_io_submit_functions
*
* \param desc Block device descriptor.
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
* \param iov A scatter gather list of buffers to be written from.
* \param iovcnt The number of elements in iov.
* \param md Metadata buffer.
* \param offset_blocks The offset, in blocks, from the start of the block device.
* \param num_blocks The number of blocks to write.
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
* negated errno on failure, in which case the callback will not be called.
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
* metadata is not supported
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
* * -EBADF - desc not open for writing
*/
int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a request to acquire a data buffer that represents the given
* range of blocks. The data buffer is placed in the spdk_bdev_io structure
@ -1197,6 +1329,16 @@ void spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io,
*/
void spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp);
/**
* Get metadata buffer. Only makes sense if the IO uses separate buffer for
* metadata transfer.
*
* \param bdev_io I/O to retrieve the buffer from.
* \return Pointer to metadata buffer, NULL if the IO doesn't use separate
* buffer for metadata transfer.
*/
void *spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io);
typedef void (*spdk_bdev_histogram_status_cb)(void *cb_arg, int status);
typedef void (*spdk_bdev_histogram_data_cb)(void *cb_arg, int status,
struct spdk_histogram_data *histogram);

View File

@ -436,6 +436,9 @@ struct spdk_bdev_io {
/** For SG buffer cases, number of iovecs in iovec array. */
int iovcnt;
/* Metadata buffer */
void *md_buf;
/** Total size of data to be transferred. */
uint64_t num_blocks;
@ -556,6 +559,7 @@ struct spdk_bdev_io {
struct iovec bounce_iov;
struct iovec *orig_iovs;
int orig_iovcnt;
void *orig_md_buf;
/** Callback for when buf is allocated */
spdk_bdev_io_get_buf_cb get_buf_cb;
@ -756,6 +760,15 @@ void spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb
*/
void spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len);
/**
* Set the given buffer as metadata buffer described by this bdev_io.
*
* \param bdev_io I/O to set the buffer on.
* \param md_buf The buffer to set as the active metadata buffer.
* \param len The length of the metadata buffer.
*/
void spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len);
/**
* Complete a bdev_io
*

View File

@ -299,6 +299,16 @@ static void _spdk_bdev_write_zero_buffer_next(void *_bdev_io);
static void _spdk_bdev_enable_qos_msg(struct spdk_io_channel_iter *i);
static void _spdk_bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status);
static int
_spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg);
static int
_spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);
void
spdk_bdev_get_opts(struct spdk_bdev_opts *opts)
{
@ -438,8 +448,15 @@ spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
iovs[0].iov_len = len;
}
void
spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
{
assert((len / spdk_bdev_get_md_size(bdev_io->bdev)) >= bdev_io->u.bdev.num_blocks);
bdev_io->u.bdev.md_buf = md_buf;
}
static bool
_is_buf_allocated(struct iovec *iovs)
_is_buf_allocated(const struct iovec *iovs)
{
if (iovs == NULL) {
return false;
@ -514,12 +531,25 @@ _bdev_io_set_bounce_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
}
}
static void
_bdev_io_set_bounce_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
{
/* save original md_buf */
bdev_io->internal.orig_md_buf = bdev_io->u.bdev.md_buf;
/* set bounce md_buf */
bdev_io->u.bdev.md_buf = md_buf;
if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
memcpy(md_buf, bdev_io->internal.orig_md_buf, len);
}
}
static void
_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len)
{
struct spdk_bdev *bdev = bdev_io->bdev;
bool buf_allocated;
uint64_t alignment;
uint64_t md_len, alignment;
void *aligned_buf;
alignment = spdk_bdev_get_buf_align(bdev);
@ -532,6 +562,19 @@ _bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len)
spdk_bdev_io_set_buf(bdev_io, aligned_buf, len);
}
if (spdk_bdev_is_md_separate(bdev)) {
aligned_buf = (char *)aligned_buf + len;
md_len = bdev_io->u.bdev.num_blocks * bdev->md_len;
assert(((uintptr_t)aligned_buf & (alignment - 1)) == 0);
if (bdev_io->u.bdev.md_buf != NULL) {
_bdev_io_set_bounce_md_buf(bdev_io, aligned_buf, md_len);
} else {
spdk_bdev_io_set_md_buf(bdev_io, aligned_buf, md_len);
}
}
bdev_io->internal.buf = buf;
bdev_io->internal.get_buf_cb(bdev_io->internal.ch->channel, bdev_io, true);
}
@ -539,21 +582,23 @@ _bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len)
static void
spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
{
struct spdk_bdev *bdev = bdev_io->bdev;
struct spdk_mempool *pool;
struct spdk_bdev_io *tmp;
bdev_io_stailq_t *stailq;
struct spdk_bdev_mgmt_channel *ch;
uint64_t buf_len, alignment;
uint64_t buf_len, md_len, alignment;
void *buf;
buf = bdev_io->internal.buf;
buf_len = bdev_io->internal.buf_len;
alignment = spdk_bdev_get_buf_align(bdev_io->bdev);
md_len = spdk_bdev_is_md_separate(bdev) ? bdev_io->u.bdev.num_blocks * bdev->md_len : 0;
alignment = spdk_bdev_get_buf_align(bdev);
ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
bdev_io->internal.buf = NULL;
if (buf_len + alignment <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) +
if (buf_len + alignment + md_len <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) +
SPDK_BDEV_POOL_ALIGNMENT) {
pool = g_bdev_mgr.buf_small_pool;
stailq = &ch->need_buf_small;
@ -574,11 +619,18 @@ spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
static void
_bdev_io_unset_bounce_buf(struct spdk_bdev_io *bdev_io)
{
if (spdk_likely(bdev_io->internal.orig_iovcnt == 0)) {
assert(bdev_io->internal.orig_md_buf == NULL);
return;
}
/* if this is read path, copy data from bounce buffer to original buffer */
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
_copy_buf_to_iovs(bdev_io->internal.orig_iovs, bdev_io->internal.orig_iovcnt,
bdev_io->internal.bounce_iov.iov_base, bdev_io->internal.bounce_iov.iov_len);
_copy_buf_to_iovs(bdev_io->internal.orig_iovs,
bdev_io->internal.orig_iovcnt,
bdev_io->internal.bounce_iov.iov_base,
bdev_io->internal.bounce_iov.iov_len);
}
/* set orignal buffer for this io */
bdev_io->u.bdev.iovcnt = bdev_io->internal.orig_iovcnt;
@ -586,21 +638,38 @@ _bdev_io_unset_bounce_buf(struct spdk_bdev_io *bdev_io)
/* disable bouncing buffer for this io */
bdev_io->internal.orig_iovcnt = 0;
bdev_io->internal.orig_iovs = NULL;
/* return bounce buffer to the pool */
/* do the same for metadata buffer */
if (spdk_unlikely(bdev_io->internal.orig_md_buf != NULL)) {
assert(spdk_bdev_is_md_separate(bdev_io->bdev));
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
memcpy(bdev_io->internal.orig_md_buf, bdev_io->u.bdev.md_buf,
bdev_io->u.bdev.num_blocks * spdk_bdev_get_md_size(bdev_io->bdev));
}
bdev_io->u.bdev.md_buf = bdev_io->internal.orig_md_buf;
bdev_io->internal.orig_md_buf = NULL;
}
spdk_bdev_io_put_buf(bdev_io);
}
void
spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len)
{
struct spdk_bdev *bdev = bdev_io->bdev;
struct spdk_mempool *pool;
bdev_io_stailq_t *stailq;
struct spdk_bdev_mgmt_channel *mgmt_ch;
uint64_t alignment;
uint64_t alignment, md_len;
void *buf;
assert(cb != NULL);
alignment = spdk_bdev_get_buf_align(bdev_io->bdev);
alignment = spdk_bdev_get_buf_align(bdev);
md_len = spdk_bdev_is_md_separate(bdev) ? bdev_io->u.bdev.num_blocks * bdev->md_len : 0;
if (_is_buf_allocated(bdev_io->u.bdev.iovs) &&
_are_iovs_aligned(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, alignment)) {
@ -609,7 +678,7 @@ spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, u
return;
}
if (len + alignment > SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_LARGE_BUF_MAX_SIZE) +
if (len + alignment + md_len > SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_LARGE_BUF_MAX_SIZE) +
SPDK_BDEV_POOL_ALIGNMENT) {
SPDK_ERRLOG("Length + alignment %" PRIu64 " is larger than allowed\n",
len + alignment);
@ -622,7 +691,7 @@ spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, u
bdev_io->internal.buf_len = len;
bdev_io->internal.get_buf_cb = cb;
if (len + alignment <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) +
if (len + alignment + md_len <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) +
SPDK_BDEV_POOL_ALIGNMENT) {
pool = g_bdev_mgr.buf_small_pool;
stailq = &mgmt_ch->need_buf_small;
@ -1531,6 +1600,7 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io)
struct iovec *parent_iov, *iov;
uint64_t parent_iov_offset, iov_len;
uint32_t parent_iovpos, parent_iovcnt, child_iovcnt, iovcnt;
void *md_buf = NULL;
int rc;
remaining = bdev_io->u.bdev.split_remaining_num_blocks;
@ -1554,6 +1624,13 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io)
to_next_boundary_bytes = to_next_boundary * blocklen;
iov = &bdev_io->child_iov[child_iovcnt];
iovcnt = 0;
if (bdev_io->u.bdev.md_buf) {
assert((parent_iov_offset % blocklen) > 0);
md_buf = (char *)bdev_io->u.bdev.md_buf + (parent_iov_offset / blocklen) *
spdk_bdev_get_md_size(bdev_io->bdev);
}
while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt &&
child_iovcnt < BDEV_IO_NUM_CHILD_IOV) {
parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
@ -1593,15 +1670,17 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io)
bdev_io->u.bdev.split_outstanding++;
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
rc = spdk_bdev_readv_blocks(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
iov, iovcnt, current_offset, to_next_boundary,
_spdk_bdev_io_split_done, bdev_io);
rc = _spdk_bdev_readv_blocks_with_md(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
iov, iovcnt, md_buf, current_offset,
to_next_boundary,
_spdk_bdev_io_split_done, bdev_io);
} else {
rc = spdk_bdev_writev_blocks(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
iov, iovcnt, current_offset, to_next_boundary,
_spdk_bdev_io_split_done, bdev_io);
rc = _spdk_bdev_writev_blocks_with_md(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
iov, iovcnt, md_buf, current_offset,
to_next_boundary,
_spdk_bdev_io_split_done, bdev_io);
}
if (rc == 0) {
@ -1783,6 +1862,7 @@ spdk_bdev_io_init(struct spdk_bdev_io *bdev_io,
bdev_io->internal.io_submit_ch = NULL;
bdev_io->internal.orig_iovs = NULL;
bdev_io->internal.orig_iovcnt = 0;
bdev_io->internal.orig_md_buf = NULL;
}
static bool
@ -2380,6 +2460,12 @@ spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev)
return (bdev->md_len != 0) && bdev->md_interleave;
}
bool
spdk_bdev_is_md_separate(const struct spdk_bdev *bdev)
{
return (bdev->md_len != 0) && !bdev->md_interleave;
}
uint32_t
spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev)
{
@ -2567,24 +2653,16 @@ spdk_bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64
return true;
}
int
spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, uint64_t offset, uint64_t nbytes,
spdk_bdev_io_completion_cb cb, void *cb_arg)
static bool
_bdev_io_check_md_buf(const struct iovec *iovs, const void *md_buf)
{
uint64_t offset_blocks, num_blocks;
if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
return -EINVAL;
}
return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
return _is_buf_allocated(iovs) == (md_buf != NULL);
}
int
spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
static int
_spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf,
void *md_buf, int64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev *bdev = desc->bdev;
struct spdk_bdev_io *bdev_io;
@ -2606,6 +2684,7 @@ spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
bdev_io->u.bdev.iovs[0].iov_base = buf;
bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
bdev_io->u.bdev.iovcnt = 1;
bdev_io->u.bdev.md_buf = md_buf;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
@ -2614,6 +2693,50 @@ spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
return 0;
}
int
spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, uint64_t offset, uint64_t nbytes,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
uint64_t offset_blocks, num_blocks;
if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
return -EINVAL;
}
return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
}
int
spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
return _spdk_bdev_read_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks,
cb, cb_arg);
}
int
spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, void *md_buf, int64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct iovec iov = {
.iov_base = buf,
};
if (!spdk_bdev_is_md_separate(desc->bdev)) {
return -EINVAL;
}
if (!_bdev_io_check_md_buf(&iov, md_buf)) {
return -EINVAL;
}
return _spdk_bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
cb, cb_arg);
}
int
spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt,
@ -2629,10 +2752,10 @@ spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
}
int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
static int
_spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev *bdev = desc->bdev;
struct spdk_bdev_io *bdev_io;
@ -2652,6 +2775,72 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *
bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
bdev_io->u.bdev.iovs = iov;
bdev_io->u.bdev.iovcnt = iovcnt;
bdev_io->u.bdev.md_buf = md_buf;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
spdk_bdev_io_submit(bdev_io);
return 0;
}
int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
return _spdk_bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
num_blocks, cb, cb_arg);
}
int
spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
if (!spdk_bdev_is_md_separate(desc->bdev)) {
return -EINVAL;
}
if (!_bdev_io_check_md_buf(iov, md_buf)) {
return -EINVAL;
}
return _spdk_bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
num_blocks, cb, cb_arg);
}
static int
_spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev *bdev = desc->bdev;
struct spdk_bdev_io *bdev_io;
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
if (!desc->write) {
return -EBADF;
}
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
return -EINVAL;
}
bdev_io = spdk_bdev_get_io(channel);
if (!bdev_io) {
return -ENOMEM;
}
bdev_io->internal.ch = channel;
bdev_io->internal.desc = desc;
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
bdev_io->u.bdev.iovs = &bdev_io->iov;
bdev_io->u.bdev.iovs[0].iov_base = buf;
bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
bdev_io->u.bdev.iovcnt = 1;
bdev_io->u.bdev.md_buf = md_buf;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
@ -2678,6 +2867,37 @@ int
spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
return _spdk_bdev_write_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks,
cb, cb_arg);
}
int
spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct iovec iov = {
.iov_base = buf,
};
if (!spdk_bdev_is_md_separate(desc->bdev)) {
return -EINVAL;
}
if (!_bdev_io_check_md_buf(&iov, md_buf)) {
return -EINVAL;
}
return _spdk_bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
cb, cb_arg);
}
static int
_spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev *bdev = desc->bdev;
struct spdk_bdev_io *bdev_io;
@ -2699,10 +2919,9 @@ spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
bdev_io->internal.ch = channel;
bdev_io->internal.desc = desc;
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
bdev_io->u.bdev.iovs = &bdev_io->iov;
bdev_io->u.bdev.iovs[0].iov_base = buf;
bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
bdev_io->u.bdev.iovcnt = 1;
bdev_io->u.bdev.iovs = iov;
bdev_io->u.bdev.iovcnt = iovcnt;
bdev_io->u.bdev.md_buf = md_buf;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
@ -2732,34 +2951,26 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev *bdev = desc->bdev;
struct spdk_bdev_io *bdev_io;
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
return _spdk_bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
num_blocks, cb, cb_arg);
}
if (!desc->write) {
return -EBADF;
}
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
int
spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
if (!spdk_bdev_is_md_separate(desc->bdev)) {
return -EINVAL;
}
bdev_io = spdk_bdev_get_io(channel);
if (!bdev_io) {
return -ENOMEM;
if (!_bdev_io_check_md_buf(iov, md_buf)) {
return -EINVAL;
}
bdev_io->internal.ch = channel;
bdev_io->internal.desc = desc;
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
bdev_io->u.bdev.iovs = iov;
bdev_io->u.bdev.iovcnt = iovcnt;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
spdk_bdev_io_submit(bdev_io);
return 0;
return _spdk_bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
num_blocks, cb, cb_arg);
}
static void
@ -3518,9 +3729,7 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta
return;
}
} else {
if (spdk_unlikely(bdev_io->internal.orig_iovcnt > 0)) {
_bdev_io_unset_bounce_buf(bdev_io);
}
_bdev_io_unset_bounce_buf(bdev_io);
assert(bdev_ch->io_outstanding > 0);
assert(shared_resource->io_outstanding > 0);
@ -4173,6 +4382,25 @@ spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *i
}
}
void *
spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io)
{
if (bdev_io == NULL) {
return NULL;
}
if (!spdk_bdev_is_md_separate(bdev_io->bdev)) {
return NULL;
}
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ ||
bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
return bdev_io->u.bdev.md_buf;
}
return NULL;
}
void
spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module)
{