From c1f1a876aa1def777725a87f565153db8e027aa5 Mon Sep 17 00:00:00 2001 From: Piotr Pelplinski Date: Fri, 12 Oct 2018 09:46:14 +0200 Subject: [PATCH] bdev: double buffering for unaligned buffers Now, that _spdk_bdev_io_get_buf offers allocating aligned buffers, add possibility to store original buffer and replace it by aligned one for the time of IO. Signed-off-by: Piotr Pelplinski Change-Id: If0ed306175631613c0f9310dccaae6615364fb49 Reviewed-on: https://review.gerrithub.io/429754 Chandler-Test-Pool: SPDK Automated Test System Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Shuhei Matsumoto Reviewed-by: Ben Walker --- include/spdk/bdev_module.h | 17 ++++-- lib/bdev/bdev.c | 122 ++++++++++++++++++++++++++++++++++--- 2 files changed, 128 insertions(+), 11 deletions(-) diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index 9327e8f810..bfc4013c0d 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -489,6 +489,11 @@ struct spdk_bdev_io { /** requested size of the buffer associated with this I/O */ uint64_t buf_len; + /** if the request is double buffered, store original request iovs here */ + struct iovec bounce_iov; + struct iovec *orig_iovs; + int orig_iovcnt; + /** Callback for when buf is allocated */ spdk_bdev_io_get_buf_cb get_buf_cb; @@ -650,10 +655,14 @@ const struct spdk_bdev_aliases_list *spdk_bdev_get_aliases(const struct spdk_bde /** * Allocate a buffer for given bdev_io. Allocation will happen - * only if the bdev_io has no assigned SGL yet. The buffer will be - * freed automatically on \c spdk_bdev_free_io() call. This call - * will never fail - in case of lack of memory given callback \c cb - * will be deferred until enough memory is freed. + * only if the bdev_io has no assigned SGL yet or SGL is not + * aligned to \c bdev->required_alignment. If SGL is not aligned, + * this call will cause copy from SGL to bounce buffer on write + * path or copy from bounce buffer to SGL before completion + * callback on read path. The buffer will be freed automatically + * on \c spdk_bdev_free_io() call. This call will never fail. + * In case of lack of memory given callback \c cb will be deferred + * until enough memory is freed. * * \param bdev_io I/O to allocate buffer for. * \param cb callback to be called when the buffer is allocated diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index f41ebfa98d..d7a5dea232 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -411,6 +411,78 @@ spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len) iovs[0].iov_len = len; } +static bool +_is_buf_allocated(struct iovec *iovs) +{ + return iovs[0].iov_base != NULL; +} + +static bool +_are_iovs_aligned(struct iovec *iovs, int iovcnt, uint32_t alignment) +{ + int i; + uintptr_t iov_base; + + if (spdk_likely(alignment == 1)) { + return true; + } + + for (i = 0; i < iovcnt; i++) { + iov_base = (uintptr_t)iovs[i].iov_base; + if ((iov_base & (alignment - 1)) != 0) { + return false; + } + } + + return true; +} + +static void +_copy_iovs_to_buf(void *buf, size_t buf_len, struct iovec *iovs, int iovcnt) +{ + int i; + size_t len; + + for (i = 0; i < iovcnt; i++) { + len = spdk_min(iovs[i].iov_len, buf_len); + memcpy(buf, iovs[i].iov_base, len); + buf += len; + buf_len -= len; + } +} + +static void +_copy_buf_to_iovs(struct iovec *iovs, int iovcnt, void *buf, size_t buf_len) +{ + int i; + size_t len; + + for (i = 0; i < iovcnt; i++) { + len = spdk_min(iovs[i].iov_len, buf_len); + memcpy(iovs[i].iov_base, buf, len); + buf += len; + buf_len -= len; + } +} + +static void +_bdev_io_set_bounce_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len) +{ + /* save original iovec */ + bdev_io->internal.orig_iovs = bdev_io->u.bdev.iovs; + bdev_io->internal.orig_iovcnt = bdev_io->u.bdev.iovcnt; + /* set bounce iov */ + bdev_io->u.bdev.iovs = &bdev_io->internal.bounce_iov; + bdev_io->u.bdev.iovcnt = 1; + /* set bounce buffer for this operation */ + bdev_io->u.bdev.iovs[0].iov_base = buf; + bdev_io->u.bdev.iovs[0].iov_len = len; + /* if this is write path, copy data from original buffer to bounce buffer */ + if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + _copy_iovs_to_buf(buf, len, bdev_io->internal.orig_iovs, bdev_io->internal.orig_iovcnt); + } +} + static void spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) { @@ -421,8 +493,7 @@ spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) struct spdk_bdev_mgmt_channel *ch; uint64_t buf_len; uint64_t alignment; - - assert(bdev_io->u.bdev.iovcnt == 1); + bool buf_allocated; buf = bdev_io->internal.buf; buf_len = bdev_io->internal.buf_len; @@ -445,9 +516,15 @@ spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) tmp = STAILQ_FIRST(stailq); alignment = spdk_bdev_get_buf_align(tmp->bdev); + buf_allocated = _is_buf_allocated(tmp->u.bdev.iovs); + aligned_buf = (void *)(((uintptr_t)buf + (alignment - 1)) & ~(alignment - 1)); - spdk_bdev_io_set_buf(tmp, aligned_buf, tmp->internal.buf_len); + if (buf_allocated) { + _bdev_io_set_bounce_buf(tmp, aligned_buf, tmp->internal.buf_len); + } else { + spdk_bdev_io_set_buf(tmp, aligned_buf, tmp->internal.buf_len); + } STAILQ_REMOVE_HEAD(stailq, internal.buf_link); tmp->internal.buf = buf; @@ -455,6 +532,25 @@ spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) } } +static void +_bdev_io_unset_bounce_buf(struct spdk_bdev_io *bdev_io) +{ + /* if this is read path, copy data from bounce buffer to original buffer */ + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ && + bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) { + _copy_buf_to_iovs(bdev_io->internal.orig_iovs, bdev_io->internal.orig_iovcnt, + bdev_io->internal.bounce_iov.iov_base, bdev_io->internal.bounce_iov.iov_len); + } + /* set orignal buffer for this io */ + bdev_io->u.bdev.iovcnt = bdev_io->internal.orig_iovcnt; + bdev_io->u.bdev.iovs = bdev_io->internal.orig_iovs; + /* disable bouncing buffer for this io */ + bdev_io->internal.orig_iovcnt = 0; + bdev_io->internal.orig_iovs = NULL; + /* return bounce buffer to the pool */ + spdk_bdev_io_put_buf(bdev_io); +} + void spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len) { @@ -463,14 +559,17 @@ spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, u void *buf, *aligned_buf; struct spdk_bdev_mgmt_channel *mgmt_ch; uint64_t alignment; + bool buf_allocated; assert(cb != NULL); assert(bdev_io->u.bdev.iovs != NULL); alignment = spdk_bdev_get_buf_align(bdev_io->bdev); + buf_allocated = _is_buf_allocated(bdev_io->u.bdev.iovs); - if (spdk_unlikely(bdev_io->u.bdev.iovs[0].iov_base != NULL)) { - /* Buffer already present */ + if (buf_allocated && + _are_iovs_aligned(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, alignment)) { + /* Buffer already present and aligned */ cb(bdev_io->internal.ch->channel, bdev_io); return; } @@ -496,8 +595,11 @@ spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, u } else { aligned_buf = (void *)(((uintptr_t)buf + (alignment - 1)) & ~(alignment - 1)); - spdk_bdev_io_set_buf(bdev_io, aligned_buf, len); - + if (buf_allocated) { + _bdev_io_set_bounce_buf(bdev_io, aligned_buf, len); + } else { + spdk_bdev_io_set_buf(bdev_io, aligned_buf, len); + } bdev_io->internal.buf = buf; bdev_io->internal.get_buf_cb(bdev_io->internal.ch->channel, bdev_io); } @@ -1499,6 +1601,8 @@ spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, bdev_io->internal.in_submit_request = false; bdev_io->internal.buf = NULL; bdev_io->internal.io_submit_ch = NULL; + bdev_io->internal.orig_iovs = NULL; + bdev_io->internal.orig_iovcnt = 0; } static bool @@ -3020,6 +3124,10 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta return; } } else { + if (spdk_unlikely(bdev_io->internal.orig_iovcnt > 0)) { + _bdev_io_unset_bounce_buf(bdev_io); + } + assert(bdev_ch->io_outstanding > 0); assert(shared_resource->io_outstanding > 0); bdev_ch->io_outstanding--;