bdev: only apply split_on_optimal_io_boundary to R/W
Splitting a 1TB unmap into individual 64KB unmap commands (for a RAID volume with 64KB strip size) would be awful - the RAID module can be much smarter about this. So back out the changes for splitting I/O without payload. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I24fe6d911f4e3c9db4b2cb5d66c7236a5596e0d9 Reviewed-on: https://review.gerrithub.io/424103 Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
parent
eb6a2cb8f5
commit
4f860d7e40
@ -254,8 +254,11 @@ struct spdk_bdev {
|
||||
/**
|
||||
* Specifies whether the optimal_io_boundary is mandatory or
|
||||
* only advisory. If set to true, the bdev layer will split
|
||||
* I/O that span the optimal_io_boundary before submitting them
|
||||
* to the bdev module.
|
||||
* READ and WRITE I/O that span the optimal_io_boundary before
|
||||
* submitting them to the bdev module.
|
||||
*
|
||||
* Note that this field cannot be used to force splitting of
|
||||
* UNMAP, WRITE_ZEROES or FLUSH I/O.
|
||||
*/
|
||||
bool split_on_optimal_io_boundary;
|
||||
|
||||
|
@ -1056,20 +1056,21 @@ _spdk_bdev_io_type_can_split(uint8_t type)
|
||||
assert(type != SPDK_BDEV_IO_TYPE_INVALID);
|
||||
assert(type < SPDK_BDEV_NUM_IO_TYPES);
|
||||
|
||||
switch (type) {
|
||||
case SPDK_BDEV_IO_TYPE_RESET:
|
||||
case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
|
||||
case SPDK_BDEV_IO_TYPE_NVME_IO:
|
||||
case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
|
||||
/* These types of bdev_io do not specify an LBA offset/length. */
|
||||
return false;
|
||||
default:
|
||||
/* Only split READ and WRITE I/O. Theoretically other types of I/O like
|
||||
* UNMAP could be split, but these types of I/O are typically much larger
|
||||
* in size (sometimes the size of the entire block device), and the bdev
|
||||
* module can more efficiently split these types of I/O. Plus those types
|
||||
* of I/O do not have a payload, which makes the splitting process simpler.
|
||||
*/
|
||||
if (type == SPDK_BDEV_IO_TYPE_READ || type == SPDK_BDEV_IO_TYPE_WRITE) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
_spdk_bdev_io_spans_boundary(struct spdk_bdev_io *bdev_io)
|
||||
_spdk_bdev_io_should_split(struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
uint64_t start_stripe, end_stripe;
|
||||
uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary;
|
||||
@ -1180,51 +1181,6 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_spdk_bdev_io_split_no_payload(void *_bdev_io)
|
||||
{
|
||||
struct spdk_bdev_io *bdev_io = _bdev_io;
|
||||
uint64_t current_offset, remaining;
|
||||
uint32_t to_next_boundary;
|
||||
int rc;
|
||||
|
||||
remaining = bdev_io->u.bdev.split_remaining_num_blocks;
|
||||
current_offset = bdev_io->u.bdev.split_current_offset_blocks;
|
||||
|
||||
to_next_boundary = _to_next_boundary(current_offset, bdev_io->bdev->optimal_io_boundary);
|
||||
to_next_boundary = spdk_min(remaining, to_next_boundary);
|
||||
|
||||
if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP) {
|
||||
rc = spdk_bdev_unmap_blocks(bdev_io->internal.desc,
|
||||
spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
||||
current_offset, to_next_boundary,
|
||||
_spdk_bdev_io_split_done, bdev_io);
|
||||
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE_ZEROES) {
|
||||
rc = spdk_bdev_write_zeroes_blocks(bdev_io->internal.desc,
|
||||
spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
||||
current_offset, to_next_boundary,
|
||||
_spdk_bdev_io_split_done, bdev_io);
|
||||
} else {
|
||||
assert(bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH);
|
||||
rc = spdk_bdev_flush_blocks(bdev_io->internal.desc,
|
||||
spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
||||
current_offset, to_next_boundary,
|
||||
_spdk_bdev_io_split_done, bdev_io);
|
||||
}
|
||||
|
||||
if (rc == 0) {
|
||||
bdev_io->u.bdev.split_current_offset_blocks += to_next_boundary;
|
||||
bdev_io->u.bdev.split_remaining_num_blocks -= to_next_boundary;
|
||||
} else {
|
||||
assert(rc == -ENOMEM);
|
||||
bdev_io->internal.waitq_entry.bdev = bdev_io->bdev;
|
||||
bdev_io->internal.waitq_entry.cb_fn = _spdk_bdev_io_split_with_payload;
|
||||
bdev_io->internal.waitq_entry.cb_arg = bdev_io;
|
||||
spdk_bdev_queue_io_wait(bdev_io->bdev, spdk_io_channel_from_ctx(bdev_io->internal.ch),
|
||||
&bdev_io->internal.waitq_entry);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
||||
{
|
||||
@ -1248,11 +1204,7 @@ _spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_ar
|
||||
* Continue with the splitting process. This function will complete the parent I/O if the
|
||||
* splitting is done.
|
||||
*/
|
||||
if (parent_io->type == SPDK_BDEV_IO_TYPE_READ || parent_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
|
||||
_spdk_bdev_io_split_with_payload(parent_io);
|
||||
} else {
|
||||
_spdk_bdev_io_split_no_payload(parent_io);
|
||||
}
|
||||
_spdk_bdev_io_split_with_payload(parent_io);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1263,11 +1215,7 @@ _spdk_bdev_io_split(struct spdk_bdev_io *bdev_io)
|
||||
bdev_io->u.bdev.split_current_offset_blocks = bdev_io->u.bdev.offset_blocks;
|
||||
bdev_io->u.bdev.split_remaining_num_blocks = bdev_io->u.bdev.num_blocks;
|
||||
|
||||
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ || bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
|
||||
_spdk_bdev_io_split_with_payload(bdev_io);
|
||||
} else {
|
||||
_spdk_bdev_io_split_no_payload(bdev_io);
|
||||
}
|
||||
_spdk_bdev_io_split_with_payload(bdev_io);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1314,7 +1262,7 @@ spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io)
|
||||
assert(thread != NULL);
|
||||
assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
|
||||
|
||||
if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_spans_boundary(bdev_io)) {
|
||||
if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_should_split(bdev_io)) {
|
||||
_spdk_bdev_io_split(bdev_io);
|
||||
return;
|
||||
}
|
||||
|
@ -747,25 +747,25 @@ bdev_io_spans_boundary_test(void)
|
||||
bdev_io.bdev = &bdev;
|
||||
|
||||
/* bdev has no optimal_io_boundary set - so this should return false. */
|
||||
CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false);
|
||||
CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false);
|
||||
|
||||
bdev.optimal_io_boundary = 32;
|
||||
bdev_io.type = SPDK_BDEV_IO_TYPE_RESET;
|
||||
|
||||
/* RESETs are not based on LBAs - so this should return false. */
|
||||
CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false);
|
||||
CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false);
|
||||
|
||||
bdev_io.type = SPDK_BDEV_IO_TYPE_READ;
|
||||
bdev_io.u.bdev.offset_blocks = 0;
|
||||
bdev_io.u.bdev.num_blocks = 32;
|
||||
|
||||
/* This I/O run right up to, but does not cross, the boundary - so this should return false. */
|
||||
CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false);
|
||||
CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false);
|
||||
|
||||
bdev_io.u.bdev.num_blocks = 33;
|
||||
|
||||
/* This I/O spans a boundary. */
|
||||
CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == true);
|
||||
CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == true);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -907,81 +907,49 @@ bdev_io_split(void)
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == true);
|
||||
|
||||
/* Test a WRITE_ZEROES that needs to be split. This is an I/O type that does not have iovecs.
|
||||
* Have this I/O end right on a boundary. Use a non-standard optimal_io_boundary to test the
|
||||
* non-power-of-2 path.
|
||||
/* Test a WRITE_ZEROES that would span an I/O boundary. WRITE_ZEROES should not be
|
||||
* split, so test that.
|
||||
*/
|
||||
bdev->optimal_io_boundary = 15;
|
||||
g_io_done = false;
|
||||
g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
|
||||
g_bdev_ut_channel->expected_offset = 9;
|
||||
g_bdev_ut_channel->expected_length = 6;
|
||||
g_bdev_ut_channel->expected_length = 36;
|
||||
g_bdev_ut_channel->expected_iovcnt = 0;
|
||||
|
||||
rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 9, 36, io_done, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
g_bdev_ut_channel->expected_offset = 15;
|
||||
g_bdev_ut_channel->expected_length = 15;
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
g_bdev_ut_channel->expected_offset = 30;
|
||||
g_bdev_ut_channel->expected_length = 15;
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == true);
|
||||
|
||||
/* Test an UNMAP that needs to be split. This is an I/O type that does not have iovecs. */
|
||||
/* Test an UNMAP. This should also not be split. */
|
||||
bdev->optimal_io_boundary = 16;
|
||||
g_io_done = false;
|
||||
g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_UNMAP;
|
||||
g_bdev_ut_channel->expected_offset = 15;
|
||||
g_bdev_ut_channel->expected_length = 1;
|
||||
g_bdev_ut_channel->expected_length = 2;
|
||||
g_bdev_ut_channel->expected_iovcnt = 0;
|
||||
|
||||
rc = spdk_bdev_unmap_blocks(desc, io_ch, 15, 2, io_done, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
g_bdev_ut_channel->expected_offset = 16;
|
||||
g_bdev_ut_channel->expected_length = 1;
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == true);
|
||||
|
||||
/* Test an FLUSH that needs to be split. This is an I/O type that does not have iovecs. */
|
||||
/* Test a FLUSH. This should also not be split. */
|
||||
bdev->optimal_io_boundary = 16;
|
||||
g_io_done = false;
|
||||
g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_FLUSH;
|
||||
g_bdev_ut_channel->expected_offset = 15;
|
||||
g_bdev_ut_channel->expected_length = 1;
|
||||
g_bdev_ut_channel->expected_length = 2;
|
||||
g_bdev_ut_channel->expected_iovcnt = 0;
|
||||
|
||||
rc = spdk_bdev_flush_blocks(desc, io_ch, 15, 2, io_done, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
g_bdev_ut_channel->expected_offset = 16;
|
||||
g_bdev_ut_channel->expected_length = 1;
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
|
||||
stub_complete_io(1);
|
||||
CU_ASSERT(g_io_done == true);
|
||||
|
Loading…
Reference in New Issue
Block a user