bdev: add write zeroes split support

The common bdev layer will split large WRITE ZEROES ranges into
multiple children requests based on the backend device's setting,
it will try to split up to 8 children requests at a time to avoid
flood requests.

Also add UT to cover different cases.

Change-Id: Id9505fbe1c297412ef97b1f73587b22bc43f770e
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7875
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Changpeng Liu 2021-05-14 18:23:16 +08:00 committed by Tomasz Zawadzki
parent 734de26066
commit e7fbdf15fd
2 changed files with 163 additions and 0 deletions

View File

@ -2026,6 +2026,20 @@ bdev_unmap_should_split(struct spdk_bdev_io *bdev_io)
return false;
}
static bool
bdev_write_zeroes_should_split(struct spdk_bdev_io *bdev_io)
{
if (!bdev_io->bdev->max_write_zeroes) {
return false;
}
if (bdev_io->u.bdev.num_blocks > bdev_io->bdev->max_write_zeroes) {
return true;
}
return false;
}
static bool
bdev_io_should_split(struct spdk_bdev_io *bdev_io)
{
@ -2035,6 +2049,8 @@ bdev_io_should_split(struct spdk_bdev_io *bdev_io)
return bdev_rw_should_split(bdev_io);
case SPDK_BDEV_IO_TYPE_UNMAP:
return bdev_unmap_should_split(bdev_io);
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
return bdev_write_zeroes_should_split(bdev_io);
default:
return false;
}
@ -2061,6 +2077,15 @@ _bdev_unmap_split(void *_bdev_io)
return bdev_unmap_split((struct spdk_bdev_io *)_bdev_io);
}
static void
bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io);
static void
_bdev_write_zeroes_split(void *_bdev_io)
{
return bdev_write_zeroes_split((struct spdk_bdev_io *)_bdev_io);
}
static int
bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt, void *md_buf,
uint64_t num_blocks, uint64_t *offset, uint64_t *remaining)
@ -2097,6 +2122,13 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
current_offset, num_blocks,
bdev_io_split_done, bdev_io);
break;
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
io_wait_fn = _bdev_write_zeroes_split;
rc = spdk_bdev_write_zeroes_blocks(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
current_offset, num_blocks,
bdev_io_split_done, bdev_io);
break;
default:
assert(false);
rc = -EINVAL;
@ -2292,6 +2324,29 @@ bdev_unmap_split(struct spdk_bdev_io *bdev_io)
}
}
static void
bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io)
{
uint64_t offset, write_zeroes_blocks, remaining;
uint32_t num_children_reqs = 0;
int rc;
offset = bdev_io->u.bdev.split_current_offset_blocks;
remaining = bdev_io->u.bdev.split_remaining_num_blocks;
while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS)) {
write_zeroes_blocks = spdk_min(remaining, bdev_io->bdev->max_write_zeroes);
rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, write_zeroes_blocks,
&offset, &remaining);
if (spdk_likely(rc == 0)) {
num_children_reqs++;
} else {
return;
}
}
}
static void
bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
@ -2335,6 +2390,9 @@ bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
case SPDK_BDEV_IO_TYPE_UNMAP:
bdev_unmap_split(parent_io);
break;
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
bdev_write_zeroes_split(parent_io);
break;
default:
assert(false);
break;
@ -2366,6 +2424,9 @@ bdev_io_split(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
case SPDK_BDEV_IO_TYPE_UNMAP:
bdev_unmap_split(bdev_io);
break;
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
bdev_write_zeroes_split(bdev_io);
break;
default:
assert(false);
break;

View File

@ -4567,6 +4567,107 @@ bdev_unmap(void)
poll_threads();
}
static void
bdev_write_zeroes_split_test(void)
{
struct spdk_bdev *bdev;
struct spdk_bdev_desc *desc = NULL;
struct spdk_io_channel *ioch;
struct spdk_bdev_channel *bdev_ch;
struct ut_expected_io *expected_io;
struct spdk_bdev_opts bdev_opts = {};
uint32_t i, num_outstanding;
uint64_t offset, num_blocks, max_write_zeroes_blocks, num_children;
int rc;
spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
bdev_opts.bdev_io_pool_size = 512;
bdev_opts.bdev_io_cache_size = 64;
rc = spdk_bdev_set_opts(&bdev_opts);
CU_ASSERT(rc == 0);
spdk_bdev_initialize(bdev_init_cb, NULL);
bdev = allocate_bdev("bdev");
rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
CU_ASSERT_EQUAL(rc, 0);
SPDK_CU_ASSERT_FATAL(desc != NULL);
CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
ioch = spdk_bdev_get_io_channel(desc);
SPDK_CU_ASSERT_FATAL(ioch != NULL);
bdev_ch = spdk_io_channel_get_ctx(ioch);
CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
fn_table.submit_request = stub_submit_request;
g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
/* Case 1: First test the request won't be split */
num_blocks = 32;
g_io_done = false;
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 0, num_blocks, 0);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
CU_ASSERT_EQUAL(rc, 0);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == true);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
/* Case 2: Test the split with 2 children requests */
max_write_zeroes_blocks = 8;
bdev->max_write_zeroes = max_write_zeroes_blocks;
num_blocks = max_write_zeroes_blocks * 2;
offset = 0;
g_io_done = false;
for (i = 0; i < 2; i++) {
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, offset, max_write_zeroes_blocks,
0);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
offset += max_write_zeroes_blocks;
}
rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
CU_ASSERT_EQUAL(rc, 0);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
stub_complete_io(2);
CU_ASSERT(g_io_done == true);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
/* Case 3: Test the split with 15 children requests, will finish 8 requests first */
num_children = 15;
num_blocks = max_write_zeroes_blocks * num_children;
g_io_done = false;
offset = 0;
for (i = 0; i < num_children; i++) {
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, offset, max_write_zeroes_blocks,
0);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
offset += max_write_zeroes_blocks;
}
rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
CU_ASSERT_EQUAL(rc, 0);
CU_ASSERT(g_io_done == false);
while (num_children > 0) {
num_outstanding = spdk_min(num_children, SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_outstanding);
stub_complete_io(num_outstanding);
num_children -= num_outstanding;
}
CU_ASSERT(g_io_done == true);
spdk_put_io_channel(ioch);
spdk_bdev_close(desc);
free_bdev(bdev);
spdk_bdev_finish(bdev_fini_cb, NULL);
poll_threads();
}
static void
bdev_set_options_test(void)
{
@ -4712,6 +4813,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, lock_lba_range_overlapped);
CU_ADD_TEST(suite, bdev_io_abort);
CU_ADD_TEST(suite, bdev_unmap);
CU_ADD_TEST(suite, bdev_write_zeroes_split_test);
CU_ADD_TEST(suite, bdev_set_options_test);
CU_ADD_TEST(suite, bdev_multi_allocation);