bdev: only apply split_on_optimal_io_boundary to R/W

Splitting a 1TB unmap into individual 64KB unmap commands
(for a RAID volume with 64KB strip size) would be awful -
the RAID module can be much smarter about this.

So back out the changes for splitting I/O without payload.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I24fe6d911f4e3c9db4b2cb5d66c7236a5596e0d9

Reviewed-on: https://review.gerrithub.io/424103
Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
Jim Harris 2018-08-30 08:06:54 -07:00
parent eb6a2cb8f5
commit 4f860d7e40
3 changed files with 29 additions and 110 deletions

View File

@ -254,8 +254,11 @@ struct spdk_bdev {
/**
* Specifies whether the optimal_io_boundary is mandatory or
* only advisory. If set to true, the bdev layer will split
* I/O that span the optimal_io_boundary before submitting them
* to the bdev module.
* READ and WRITE I/O that span the optimal_io_boundary before
* submitting them to the bdev module.
*
* Note that this field cannot be used to force splitting of
* UNMAP, WRITE_ZEROES or FLUSH I/O.
*/
bool split_on_optimal_io_boundary;

View File

@ -1056,20 +1056,21 @@ _spdk_bdev_io_type_can_split(uint8_t type)
assert(type != SPDK_BDEV_IO_TYPE_INVALID);
assert(type < SPDK_BDEV_NUM_IO_TYPES);
switch (type) {
case SPDK_BDEV_IO_TYPE_RESET:
case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
case SPDK_BDEV_IO_TYPE_NVME_IO:
case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
/* These types of bdev_io do not specify an LBA offset/length. */
return false;
default:
/* Only split READ and WRITE I/O. Theoretically other types of I/O like
* UNMAP could be split, but these types of I/O are typically much larger
* in size (sometimes the size of the entire block device), and the bdev
* module can more efficiently split these types of I/O. Plus those types
* of I/O do not have a payload, which makes the splitting process simpler.
*/
if (type == SPDK_BDEV_IO_TYPE_READ || type == SPDK_BDEV_IO_TYPE_WRITE) {
return true;
} else {
return false;
}
}
static bool
_spdk_bdev_io_spans_boundary(struct spdk_bdev_io *bdev_io)
_spdk_bdev_io_should_split(struct spdk_bdev_io *bdev_io)
{
uint64_t start_stripe, end_stripe;
uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary;
@ -1180,51 +1181,6 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io)
}
}
static void
_spdk_bdev_io_split_no_payload(void *_bdev_io)
{
struct spdk_bdev_io *bdev_io = _bdev_io;
uint64_t current_offset, remaining;
uint32_t to_next_boundary;
int rc;
remaining = bdev_io->u.bdev.split_remaining_num_blocks;
current_offset = bdev_io->u.bdev.split_current_offset_blocks;
to_next_boundary = _to_next_boundary(current_offset, bdev_io->bdev->optimal_io_boundary);
to_next_boundary = spdk_min(remaining, to_next_boundary);
if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP) {
rc = spdk_bdev_unmap_blocks(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
current_offset, to_next_boundary,
_spdk_bdev_io_split_done, bdev_io);
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE_ZEROES) {
rc = spdk_bdev_write_zeroes_blocks(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
current_offset, to_next_boundary,
_spdk_bdev_io_split_done, bdev_io);
} else {
assert(bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH);
rc = spdk_bdev_flush_blocks(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
current_offset, to_next_boundary,
_spdk_bdev_io_split_done, bdev_io);
}
if (rc == 0) {
bdev_io->u.bdev.split_current_offset_blocks += to_next_boundary;
bdev_io->u.bdev.split_remaining_num_blocks -= to_next_boundary;
} else {
assert(rc == -ENOMEM);
bdev_io->internal.waitq_entry.bdev = bdev_io->bdev;
bdev_io->internal.waitq_entry.cb_fn = _spdk_bdev_io_split_with_payload;
bdev_io->internal.waitq_entry.cb_arg = bdev_io;
spdk_bdev_queue_io_wait(bdev_io->bdev, spdk_io_channel_from_ctx(bdev_io->internal.ch),
&bdev_io->internal.waitq_entry);
}
}
static void
_spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
@ -1248,11 +1204,7 @@ _spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_ar
* Continue with the splitting process. This function will complete the parent I/O if the
* splitting is done.
*/
if (parent_io->type == SPDK_BDEV_IO_TYPE_READ || parent_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
_spdk_bdev_io_split_with_payload(parent_io);
} else {
_spdk_bdev_io_split_no_payload(parent_io);
}
_spdk_bdev_io_split_with_payload(parent_io);
}
static void
@ -1263,11 +1215,7 @@ _spdk_bdev_io_split(struct spdk_bdev_io *bdev_io)
bdev_io->u.bdev.split_current_offset_blocks = bdev_io->u.bdev.offset_blocks;
bdev_io->u.bdev.split_remaining_num_blocks = bdev_io->u.bdev.num_blocks;
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ || bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
_spdk_bdev_io_split_with_payload(bdev_io);
} else {
_spdk_bdev_io_split_no_payload(bdev_io);
}
_spdk_bdev_io_split_with_payload(bdev_io);
}
static void
@ -1314,7 +1262,7 @@ spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io)
assert(thread != NULL);
assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_spans_boundary(bdev_io)) {
if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_should_split(bdev_io)) {
_spdk_bdev_io_split(bdev_io);
return;
}

View File

@ -747,25 +747,25 @@ bdev_io_spans_boundary_test(void)
bdev_io.bdev = &bdev;
/* bdev has no optimal_io_boundary set - so this should return false. */
CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false);
CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false);
bdev.optimal_io_boundary = 32;
bdev_io.type = SPDK_BDEV_IO_TYPE_RESET;
/* RESETs are not based on LBAs - so this should return false. */
CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false);
CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false);
bdev_io.type = SPDK_BDEV_IO_TYPE_READ;
bdev_io.u.bdev.offset_blocks = 0;
bdev_io.u.bdev.num_blocks = 32;
/* This I/O run right up to, but does not cross, the boundary - so this should return false. */
CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false);
CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false);
bdev_io.u.bdev.num_blocks = 33;
/* This I/O spans a boundary. */
CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == true);
CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == true);
}
static void
@ -907,81 +907,49 @@ bdev_io_split(void)
stub_complete_io(1);
CU_ASSERT(g_io_done == true);
/* Test a WRITE_ZEROES that needs to be split. This is an I/O type that does not have iovecs.
* Have this I/O end right on a boundary. Use a non-standard optimal_io_boundary to test the
* non-power-of-2 path.
/* Test a WRITE_ZEROES that would span an I/O boundary. WRITE_ZEROES should not be
* split, so test that.
*/
bdev->optimal_io_boundary = 15;
g_io_done = false;
g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
g_bdev_ut_channel->expected_offset = 9;
g_bdev_ut_channel->expected_length = 6;
g_bdev_ut_channel->expected_length = 36;
g_bdev_ut_channel->expected_iovcnt = 0;
rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 9, 36, io_done, NULL);
CU_ASSERT(rc == 0);
CU_ASSERT(g_io_done == false);
g_bdev_ut_channel->expected_offset = 15;
g_bdev_ut_channel->expected_length = 15;
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == false);
g_bdev_ut_channel->expected_offset = 30;
g_bdev_ut_channel->expected_length = 15;
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == true);
/* Test an UNMAP that needs to be split. This is an I/O type that does not have iovecs. */
/* Test an UNMAP. This should also not be split. */
bdev->optimal_io_boundary = 16;
g_io_done = false;
g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_UNMAP;
g_bdev_ut_channel->expected_offset = 15;
g_bdev_ut_channel->expected_length = 1;
g_bdev_ut_channel->expected_length = 2;
g_bdev_ut_channel->expected_iovcnt = 0;
rc = spdk_bdev_unmap_blocks(desc, io_ch, 15, 2, io_done, NULL);
CU_ASSERT(rc == 0);
CU_ASSERT(g_io_done == false);
g_bdev_ut_channel->expected_offset = 16;
g_bdev_ut_channel->expected_length = 1;
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == true);
/* Test an FLUSH that needs to be split. This is an I/O type that does not have iovecs. */
/* Test a FLUSH. This should also not be split. */
bdev->optimal_io_boundary = 16;
g_io_done = false;
g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_FLUSH;
g_bdev_ut_channel->expected_offset = 15;
g_bdev_ut_channel->expected_length = 1;
g_bdev_ut_channel->expected_length = 2;
g_bdev_ut_channel->expected_iovcnt = 0;
rc = spdk_bdev_flush_blocks(desc, io_ch, 15, 2, io_done, NULL);
CU_ASSERT(rc == 0);
CU_ASSERT(g_io_done == false);
g_bdev_ut_channel->expected_offset = 16;
g_bdev_ut_channel->expected_length = 1;
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
stub_complete_io(1);
CU_ASSERT(g_io_done == true);