diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index e4c8070f8f..a7106a467a 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -254,8 +254,11 @@ struct spdk_bdev { /** * Specifies whether the optimal_io_boundary is mandatory or * only advisory. If set to true, the bdev layer will split - * I/O that span the optimal_io_boundary before submitting them - * to the bdev module. + * READ and WRITE I/O that span the optimal_io_boundary before + * submitting them to the bdev module. + * + * Note that this field cannot be used to force splitting of + * UNMAP, WRITE_ZEROES or FLUSH I/O. */ bool split_on_optimal_io_boundary; diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 01a561741d..64b621bf55 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -1056,20 +1056,21 @@ _spdk_bdev_io_type_can_split(uint8_t type) assert(type != SPDK_BDEV_IO_TYPE_INVALID); assert(type < SPDK_BDEV_NUM_IO_TYPES); - switch (type) { - case SPDK_BDEV_IO_TYPE_RESET: - case SPDK_BDEV_IO_TYPE_NVME_ADMIN: - case SPDK_BDEV_IO_TYPE_NVME_IO: - case SPDK_BDEV_IO_TYPE_NVME_IO_MD: - /* These types of bdev_io do not specify an LBA offset/length. */ - return false; - default: + /* Only split READ and WRITE I/O. Theoretically other types of I/O like + * UNMAP could be split, but these types of I/O are typically much larger + * in size (sometimes the size of the entire block device), and the bdev + * module can more efficiently split these types of I/O. Plus those types + * of I/O do not have a payload, which makes the splitting process simpler. + */ + if (type == SPDK_BDEV_IO_TYPE_READ || type == SPDK_BDEV_IO_TYPE_WRITE) { return true; + } else { + return false; } } static bool -_spdk_bdev_io_spans_boundary(struct spdk_bdev_io *bdev_io) +_spdk_bdev_io_should_split(struct spdk_bdev_io *bdev_io) { uint64_t start_stripe, end_stripe; uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary; @@ -1180,51 +1181,6 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io) } } -static void -_spdk_bdev_io_split_no_payload(void *_bdev_io) -{ - struct spdk_bdev_io *bdev_io = _bdev_io; - uint64_t current_offset, remaining; - uint32_t to_next_boundary; - int rc; - - remaining = bdev_io->u.bdev.split_remaining_num_blocks; - current_offset = bdev_io->u.bdev.split_current_offset_blocks; - - to_next_boundary = _to_next_boundary(current_offset, bdev_io->bdev->optimal_io_boundary); - to_next_boundary = spdk_min(remaining, to_next_boundary); - - if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP) { - rc = spdk_bdev_unmap_blocks(bdev_io->internal.desc, - spdk_io_channel_from_ctx(bdev_io->internal.ch), - current_offset, to_next_boundary, - _spdk_bdev_io_split_done, bdev_io); - } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE_ZEROES) { - rc = spdk_bdev_write_zeroes_blocks(bdev_io->internal.desc, - spdk_io_channel_from_ctx(bdev_io->internal.ch), - current_offset, to_next_boundary, - _spdk_bdev_io_split_done, bdev_io); - } else { - assert(bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH); - rc = spdk_bdev_flush_blocks(bdev_io->internal.desc, - spdk_io_channel_from_ctx(bdev_io->internal.ch), - current_offset, to_next_boundary, - _spdk_bdev_io_split_done, bdev_io); - } - - if (rc == 0) { - bdev_io->u.bdev.split_current_offset_blocks += to_next_boundary; - bdev_io->u.bdev.split_remaining_num_blocks -= to_next_boundary; - } else { - assert(rc == -ENOMEM); - bdev_io->internal.waitq_entry.bdev = bdev_io->bdev; - bdev_io->internal.waitq_entry.cb_fn = _spdk_bdev_io_split_with_payload; - bdev_io->internal.waitq_entry.cb_arg = bdev_io; - spdk_bdev_queue_io_wait(bdev_io->bdev, spdk_io_channel_from_ctx(bdev_io->internal.ch), - &bdev_io->internal.waitq_entry); - } -} - static void _spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { @@ -1248,11 +1204,7 @@ _spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_ar * Continue with the splitting process. This function will complete the parent I/O if the * splitting is done. */ - if (parent_io->type == SPDK_BDEV_IO_TYPE_READ || parent_io->type == SPDK_BDEV_IO_TYPE_WRITE) { - _spdk_bdev_io_split_with_payload(parent_io); - } else { - _spdk_bdev_io_split_no_payload(parent_io); - } + _spdk_bdev_io_split_with_payload(parent_io); } static void @@ -1263,11 +1215,7 @@ _spdk_bdev_io_split(struct spdk_bdev_io *bdev_io) bdev_io->u.bdev.split_current_offset_blocks = bdev_io->u.bdev.offset_blocks; bdev_io->u.bdev.split_remaining_num_blocks = bdev_io->u.bdev.num_blocks; - if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ || bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { - _spdk_bdev_io_split_with_payload(bdev_io); - } else { - _spdk_bdev_io_split_no_payload(bdev_io); - } + _spdk_bdev_io_split_with_payload(bdev_io); } static void @@ -1314,7 +1262,7 @@ spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) assert(thread != NULL); assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING); - if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_spans_boundary(bdev_io)) { + if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_should_split(bdev_io)) { _spdk_bdev_io_split(bdev_io); return; } diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index df011bbc15..9ccf1bc586 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -747,25 +747,25 @@ bdev_io_spans_boundary_test(void) bdev_io.bdev = &bdev; /* bdev has no optimal_io_boundary set - so this should return false. */ - CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false); + CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false); bdev.optimal_io_boundary = 32; bdev_io.type = SPDK_BDEV_IO_TYPE_RESET; /* RESETs are not based on LBAs - so this should return false. */ - CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false); + CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false); bdev_io.type = SPDK_BDEV_IO_TYPE_READ; bdev_io.u.bdev.offset_blocks = 0; bdev_io.u.bdev.num_blocks = 32; /* This I/O run right up to, but does not cross, the boundary - so this should return false. */ - CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false); + CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false); bdev_io.u.bdev.num_blocks = 33; /* This I/O spans a boundary. */ - CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == true); + CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == true); } static void @@ -907,81 +907,49 @@ bdev_io_split(void) stub_complete_io(1); CU_ASSERT(g_io_done == true); - /* Test a WRITE_ZEROES that needs to be split. This is an I/O type that does not have iovecs. - * Have this I/O end right on a boundary. Use a non-standard optimal_io_boundary to test the - * non-power-of-2 path. + /* Test a WRITE_ZEROES that would span an I/O boundary. WRITE_ZEROES should not be + * split, so test that. */ bdev->optimal_io_boundary = 15; g_io_done = false; g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_WRITE_ZEROES; g_bdev_ut_channel->expected_offset = 9; - g_bdev_ut_channel->expected_length = 6; + g_bdev_ut_channel->expected_length = 36; g_bdev_ut_channel->expected_iovcnt = 0; rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 9, 36, io_done, NULL); CU_ASSERT(rc == 0); CU_ASSERT(g_io_done == false); - - g_bdev_ut_channel->expected_offset = 15; - g_bdev_ut_channel->expected_length = 15; - - CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); - stub_complete_io(1); - CU_ASSERT(g_io_done == false); - - g_bdev_ut_channel->expected_offset = 30; - g_bdev_ut_channel->expected_length = 15; - - CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); - stub_complete_io(1); - CU_ASSERT(g_io_done == false); - CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); stub_complete_io(1); CU_ASSERT(g_io_done == true); - /* Test an UNMAP that needs to be split. This is an I/O type that does not have iovecs. */ + /* Test an UNMAP. This should also not be split. */ bdev->optimal_io_boundary = 16; g_io_done = false; g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_UNMAP; g_bdev_ut_channel->expected_offset = 15; - g_bdev_ut_channel->expected_length = 1; + g_bdev_ut_channel->expected_length = 2; g_bdev_ut_channel->expected_iovcnt = 0; rc = spdk_bdev_unmap_blocks(desc, io_ch, 15, 2, io_done, NULL); CU_ASSERT(rc == 0); CU_ASSERT(g_io_done == false); - - g_bdev_ut_channel->expected_offset = 16; - g_bdev_ut_channel->expected_length = 1; - - CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); - stub_complete_io(1); - CU_ASSERT(g_io_done == false); - CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); stub_complete_io(1); CU_ASSERT(g_io_done == true); - /* Test an FLUSH that needs to be split. This is an I/O type that does not have iovecs. */ + /* Test a FLUSH. This should also not be split. */ bdev->optimal_io_boundary = 16; g_io_done = false; g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_FLUSH; g_bdev_ut_channel->expected_offset = 15; - g_bdev_ut_channel->expected_length = 1; + g_bdev_ut_channel->expected_length = 2; g_bdev_ut_channel->expected_iovcnt = 0; rc = spdk_bdev_flush_blocks(desc, io_ch, 15, 2, io_done, NULL); CU_ASSERT(rc == 0); CU_ASSERT(g_io_done == false); - - g_bdev_ut_channel->expected_offset = 16; - g_bdev_ut_channel->expected_length = 1; - - CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); - stub_complete_io(1); - CU_ASSERT(g_io_done == false); - CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); stub_complete_io(1); CU_ASSERT(g_io_done == true);