bdev: add fallback from write_zeroes to writev
if write_zeroes is not supported by the block device, we can get the same behavior by simply writing a buffer full of zeroes to the blocks we want to erase. I also incorporate splitting into the bdev layer to accomodate large i/o. Change-Id: I8fa1bfaaf22d7bfc6e3afb6e89d22fa9f7767e55 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/373829 Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com> Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
This commit is contained in:
parent
5f5edbcb8c
commit
3fd7f28dc9
@ -271,6 +271,12 @@ struct spdk_bdev_io {
|
||||
/** Status for the IO */
|
||||
int16_t status;
|
||||
|
||||
/** number of blocks remaining in a split i/o */
|
||||
uint64_t split_remaining_num_blocks;
|
||||
|
||||
/** current offset of the split I/O in the bdev */
|
||||
uint64_t split_current_offset_blocks;
|
||||
|
||||
/**
|
||||
* Set to true while the bdev module submit_request function is in progress.
|
||||
*
|
||||
@ -337,6 +343,9 @@ struct spdk_bdev_io {
|
||||
/** User function that will be called when this completes */
|
||||
spdk_bdev_io_completion_cb cb;
|
||||
|
||||
/** stored user callback in case we split the I/O and use a temporary callback */
|
||||
spdk_bdev_io_completion_cb stored_user_cb;
|
||||
|
||||
/** Context that will be passed to the completion callback */
|
||||
void *caller_ctx;
|
||||
|
||||
|
@ -58,6 +58,7 @@ int __itt_init_ittlib(const char *, __itt_group_id);
|
||||
#define BUF_SMALL_POOL_SIZE 8192
|
||||
#define BUF_LARGE_POOL_SIZE 1024
|
||||
#define NOMEM_THRESHOLD_COUNT 8
|
||||
#define ZERO_BUFFER_SIZE 0x100000
|
||||
|
||||
typedef TAILQ_HEAD(, spdk_bdev_io) bdev_io_tailq_t;
|
||||
|
||||
@ -67,6 +68,8 @@ struct spdk_bdev_mgr {
|
||||
struct spdk_mempool *buf_small_pool;
|
||||
struct spdk_mempool *buf_large_pool;
|
||||
|
||||
void *zero_buffer;
|
||||
|
||||
TAILQ_HEAD(, spdk_bdev_module_if) bdev_modules;
|
||||
|
||||
TAILQ_HEAD(, spdk_bdev) bdevs;
|
||||
@ -150,6 +153,8 @@ struct spdk_bdev_channel {
|
||||
|
||||
};
|
||||
|
||||
static void spdk_bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
|
||||
|
||||
struct spdk_bdev *
|
||||
spdk_bdev_first(void)
|
||||
{
|
||||
@ -527,6 +532,14 @@ spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg,
|
||||
return;
|
||||
}
|
||||
|
||||
g_bdev_mgr.zero_buffer = spdk_dma_zmalloc(ZERO_BUFFER_SIZE, ZERO_BUFFER_SIZE,
|
||||
NULL);
|
||||
if (!g_bdev_mgr.zero_buffer) {
|
||||
SPDK_ERRLOG("create bdev zero buffer failed\n");
|
||||
spdk_bdev_init_complete(-1);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef SPDK_CONFIG_VTUNE
|
||||
g_bdev_mgr.domain = __itt_domain_create("spdk_bdev");
|
||||
#endif
|
||||
@ -579,6 +592,7 @@ spdk_bdev_finish(void)
|
||||
spdk_mempool_free(g_bdev_mgr.bdev_io_pool);
|
||||
spdk_mempool_free(g_bdev_mgr.buf_small_pool);
|
||||
spdk_mempool_free(g_bdev_mgr.buf_large_pool);
|
||||
spdk_dma_free(g_bdev_mgr.zero_buffer);
|
||||
|
||||
spdk_io_device_unregister(&g_bdev_mgr, NULL);
|
||||
}
|
||||
@ -1088,26 +1102,60 @@ spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channe
|
||||
struct spdk_bdev *bdev = desc->bdev;
|
||||
struct spdk_bdev_io *bdev_io;
|
||||
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
||||
uint64_t len;
|
||||
bool split_request = false;
|
||||
|
||||
if (num_blocks > UINT64_MAX / spdk_bdev_get_block_size(bdev)) {
|
||||
SPDK_ERRLOG("length argument out of range in write_zeroes\n");
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bdev_io = spdk_bdev_get_io();
|
||||
|
||||
if (!bdev_io) {
|
||||
SPDK_ERRLOG("bdev_io memory allocation failed duing write_zeroes\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
bdev_io->ch = channel;
|
||||
bdev_io->u.bdev.iovs = NULL;
|
||||
bdev_io->u.bdev.iovcnt = 0;
|
||||
bdev_io->u.bdev.num_blocks = num_blocks;
|
||||
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
||||
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
|
||||
|
||||
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
||||
if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
|
||||
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
|
||||
bdev_io->u.bdev.num_blocks = num_blocks;
|
||||
bdev_io->u.bdev.iovs = NULL;
|
||||
bdev_io->u.bdev.iovcnt = 0;
|
||||
|
||||
} else {
|
||||
assert(spdk_bdev_get_block_size(bdev) <= ZERO_BUFFER_SIZE);
|
||||
|
||||
len = spdk_bdev_get_block_size(bdev) * num_blocks;
|
||||
|
||||
if (len > ZERO_BUFFER_SIZE) {
|
||||
split_request = true;
|
||||
len = ZERO_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
|
||||
bdev_io->u.bdev.iov.iov_base = g_bdev_mgr.zero_buffer;
|
||||
bdev_io->u.bdev.iov.iov_len = len;
|
||||
bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov;
|
||||
bdev_io->u.bdev.iovcnt = 1;
|
||||
bdev_io->u.bdev.num_blocks = len / spdk_bdev_get_block_size(bdev);
|
||||
bdev_io->split_remaining_num_blocks = num_blocks - bdev_io->u.bdev.num_blocks;
|
||||
bdev_io->split_current_offset_blocks = offset_blocks + bdev_io->u.bdev.num_blocks;
|
||||
}
|
||||
|
||||
if (split_request) {
|
||||
bdev_io->stored_user_cb = cb;
|
||||
spdk_bdev_io_init(bdev_io, bdev, cb_arg, spdk_bdev_write_zeroes_split);
|
||||
} else {
|
||||
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
||||
}
|
||||
spdk_bdev_io_submit(bdev_io);
|
||||
return 0;
|
||||
}
|
||||
@ -1948,6 +1996,35 @@ spdk_bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_
|
||||
spdk_bdev_free_io(bdev_io);
|
||||
}
|
||||
|
||||
static void
|
||||
spdk_bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
||||
{
|
||||
uint64_t len;
|
||||
|
||||
if (!success) {
|
||||
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
||||
return;
|
||||
}
|
||||
|
||||
/* no need to perform the error checking from write_zeroes_blocks because this request already passed those checks. */
|
||||
len = spdk_min(spdk_bdev_get_block_size(bdev_io->bdev) * bdev_io->split_remaining_num_blocks,
|
||||
ZERO_BUFFER_SIZE);
|
||||
|
||||
bdev_io->u.bdev.offset_blocks = bdev_io->split_current_offset_blocks;
|
||||
bdev_io->u.bdev.iov.iov_len = len;
|
||||
bdev_io->u.bdev.num_blocks = len / spdk_bdev_get_block_size(bdev_io->bdev);
|
||||
bdev_io->split_remaining_num_blocks -= bdev_io->u.bdev.num_blocks;
|
||||
bdev_io->split_current_offset_blocks += bdev_io->u.bdev.num_blocks;
|
||||
|
||||
/* if this round completes the i/o, change the callback to be the original user callback */
|
||||
if (bdev_io->split_remaining_num_blocks == 0) {
|
||||
spdk_bdev_io_init(bdev_io, bdev_io->bdev, cb_arg, bdev_io->stored_user_cb);
|
||||
} else {
|
||||
spdk_bdev_io_init(bdev_io, bdev_io->bdev, cb_arg, spdk_bdev_write_zeroes_split);
|
||||
}
|
||||
spdk_bdev_io_submit(bdev_io);
|
||||
}
|
||||
|
||||
void
|
||||
spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
|
@ -209,6 +209,21 @@ __blockdev_write(void *arg1, void *arg2)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
__blockdev_write_zeroes(void *arg1, void *arg2)
|
||||
{
|
||||
struct bdevio_request *req = arg1;
|
||||
struct io_target *target = req->target;
|
||||
int rc;
|
||||
|
||||
rc = spdk_bdev_write_zeroes(target->bdev_desc, target->ch, req->offset,
|
||||
req->data_len, quick_test_complete, NULL);
|
||||
if (rc) {
|
||||
g_completion_success = false;
|
||||
wake_ut_thread();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sgl_chop_buffer(struct bdevio_request *req, int iov_len)
|
||||
{
|
||||
@ -250,6 +265,22 @@ blockdev_write(struct io_target *target, char *tx_buf,
|
||||
execute_spdk_function(__blockdev_write, &req, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
blockdev_write_zeroes(struct io_target *target, char *tx_buf,
|
||||
uint64_t offset, int data_len)
|
||||
{
|
||||
struct bdevio_request req;
|
||||
|
||||
req.target = target;
|
||||
req.buf = tx_buf;
|
||||
req.data_len = data_len;
|
||||
req.offset = offset;
|
||||
|
||||
g_completion_success = false;
|
||||
|
||||
execute_spdk_function(__blockdev_write_zeroes, &req, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
__blockdev_read(void *arg1, void *arg2)
|
||||
{
|
||||
@ -303,7 +334,7 @@ blockdev_write_read_data_match(char *rx_buf, char *tx_buf, int data_length)
|
||||
|
||||
static void
|
||||
blockdev_write_read(uint32_t data_length, uint32_t iov_len, int pattern, uint64_t offset,
|
||||
int expected_rc)
|
||||
int expected_rc, bool write_zeroes)
|
||||
{
|
||||
struct io_target *target;
|
||||
char *tx_buf = NULL;
|
||||
@ -312,22 +343,30 @@ blockdev_write_read(uint32_t data_length, uint32_t iov_len, int pattern, uint64_
|
||||
|
||||
target = g_io_targets;
|
||||
while (target != NULL) {
|
||||
if (data_length < spdk_bdev_get_block_size(target->bdev)) {
|
||||
if (data_length < spdk_bdev_get_block_size(target->bdev) ||
|
||||
data_length / spdk_bdev_get_block_size(target->bdev) > spdk_bdev_get_num_blocks(target->bdev)) {
|
||||
target = target->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
initialize_buffer(&tx_buf, pattern, data_length);
|
||||
initialize_buffer(&rx_buf, 0, data_length);
|
||||
if (!write_zeroes) {
|
||||
initialize_buffer(&tx_buf, pattern, data_length);
|
||||
initialize_buffer(&rx_buf, 0, data_length);
|
||||
|
||||
blockdev_write(target, tx_buf, offset, data_length, iov_len);
|
||||
} else {
|
||||
initialize_buffer(&tx_buf, 0, data_length);
|
||||
initialize_buffer(&rx_buf, pattern, data_length);
|
||||
|
||||
blockdev_write_zeroes(target, tx_buf, offset, data_length);
|
||||
}
|
||||
|
||||
blockdev_write(target, tx_buf, offset, data_length, iov_len);
|
||||
|
||||
if (expected_rc == 0) {
|
||||
CU_ASSERT_EQUAL(g_completion_success, true);
|
||||
} else {
|
||||
CU_ASSERT_EQUAL(g_completion_success, false);
|
||||
}
|
||||
|
||||
blockdev_read(target, rx_buf, offset, data_length, iov_len);
|
||||
|
||||
if (expected_rc == 0) {
|
||||
@ -364,7 +403,96 @@ blockdev_write_read_4k(void)
|
||||
* of write and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
blockdev_write_zeroes_read_4k(void)
|
||||
{
|
||||
uint32_t data_length;
|
||||
uint64_t offset;
|
||||
int pattern;
|
||||
int expected_rc;
|
||||
|
||||
/* Data size = 4K */
|
||||
data_length = 4096;
|
||||
offset = 0;
|
||||
pattern = 0xA3;
|
||||
/* Params are valid, hence the expected return value
|
||||
* of write_zeroes and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* This i/o will not have to split at the bdev layer.
|
||||
*/
|
||||
static void
|
||||
blockdev_write_zeroes_read_1m(void)
|
||||
{
|
||||
uint32_t data_length;
|
||||
uint64_t offset;
|
||||
int pattern;
|
||||
int expected_rc;
|
||||
|
||||
/* Data size = 1M */
|
||||
data_length = 1048576;
|
||||
offset = 0;
|
||||
pattern = 0xA3;
|
||||
/* Params are valid, hence the expected return value
|
||||
* of write_zeroes and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* This i/o will have to split at the bdev layer if
|
||||
* write-zeroes is not supported by the bdev.
|
||||
*/
|
||||
static void
|
||||
blockdev_write_zeroes_read_3m(void)
|
||||
{
|
||||
uint32_t data_length;
|
||||
uint64_t offset;
|
||||
int pattern;
|
||||
int expected_rc;
|
||||
|
||||
/* Data size = 3M */
|
||||
data_length = 3145728;
|
||||
offset = 0;
|
||||
pattern = 0xA3;
|
||||
/* Params are valid, hence the expected return value
|
||||
* of write_zeroes and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* This i/o will have to split at the bdev layer if
|
||||
* write-zeroes is not supported by the bdev. It also
|
||||
* tests a write size that is not an even multiple of
|
||||
* the bdev layer zero buffer size.
|
||||
*/
|
||||
static void
|
||||
blockdev_write_zeroes_read_3m_500k(void)
|
||||
{
|
||||
uint32_t data_length;
|
||||
uint64_t offset;
|
||||
int pattern;
|
||||
int expected_rc;
|
||||
|
||||
/* Data size = 3.5M */
|
||||
data_length = 3670016;
|
||||
offset = 0;
|
||||
pattern = 0xA3;
|
||||
/* Params are valid, hence the expected return value
|
||||
* of write_zeroes and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -385,7 +513,7 @@ blockdev_writev_readv_4k(void)
|
||||
* of write and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -406,7 +534,7 @@ blockdev_writev_readv_30x4k(void)
|
||||
* of write and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -426,7 +554,7 @@ blockdev_write_read_512Bytes(void)
|
||||
* of write and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -447,7 +575,7 @@ blockdev_writev_readv_512Bytes(void)
|
||||
* of write and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -467,7 +595,7 @@ blockdev_write_read_size_gt_128k(void)
|
||||
* of write and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -488,7 +616,7 @@ blockdev_writev_readv_size_gt_128k(void)
|
||||
* of write and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -509,7 +637,7 @@ blockdev_writev_readv_size_gt_128k_two_iov(void)
|
||||
* of write and read for all blockdevs is 0. */
|
||||
expected_rc = 0;
|
||||
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -529,7 +657,7 @@ blockdev_write_read_invalid_size(void)
|
||||
* of write and read for all blockdevs is < 0 */
|
||||
expected_rc = -1;
|
||||
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -628,7 +756,7 @@ blockdev_write_read_max_offset(void)
|
||||
* of write and read for all blockdevs is < 0 */
|
||||
expected_rc = -1;
|
||||
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -649,7 +777,7 @@ blockdev_overlapped_write_read_8k(void)
|
||||
expected_rc = 0;
|
||||
/* Assert the write by comparing it with values read
|
||||
* from the same offset for each blockdev */
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
|
||||
|
||||
/* Overwrite the pattern 0xbb of size 8K on an address offset overlapping
|
||||
* with the address written above and assert the new value in
|
||||
@ -660,7 +788,7 @@ blockdev_overlapped_write_read_8k(void)
|
||||
offset = 4096;
|
||||
/* Assert the write by comparing it with values read
|
||||
* from the overlapped offset for each blockdev */
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc);
|
||||
blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -741,6 +869,10 @@ __run_ut_thread(void *arg1, void *arg2)
|
||||
|
||||
if (
|
||||
CU_add_test(suite, "blockdev write read 4k", blockdev_write_read_4k) == NULL
|
||||
|| CU_add_test(suite, "blockdev write zeroes read 4k", blockdev_write_zeroes_read_4k) == NULL
|
||||
|| CU_add_test(suite, "blockdev write zeroes read 1m", blockdev_write_zeroes_read_1m) == NULL
|
||||
|| CU_add_test(suite, "blockdev write zeroes read 3m", blockdev_write_zeroes_read_3m) == NULL
|
||||
|| CU_add_test(suite, "blockdev write zeroes read 3.5m", blockdev_write_zeroes_read_3m_500k) == NULL
|
||||
|| CU_add_test(suite, "blockdev write read 512 bytes",
|
||||
blockdev_write_read_512Bytes) == NULL
|
||||
|| CU_add_test(suite, "blockdev write read size > 128k",
|
||||
|
Loading…
Reference in New Issue
Block a user