From 0d1aa0252d7f50682472b2cc38e280603468863d Mon Sep 17 00:00:00 2001 From: Tomasz Zawadzki Date: Fri, 22 Nov 2019 11:47:14 -0500 Subject: [PATCH] blob: fix sequentially allocated clusters starting from 0 When serializing extents, run-length encoding is supposed to 1) RLE all sequential LBAs 2) RLE zero LBAs (unallocated) There is one special case, with sequential LBAs that start with 0 LBA. This is RLE as 1) case, but results in descriptor matching case 2). Which causes loss of allocated clusters. This requires following conditions to be met: - blobstore has just a single cluster reserved for MD - blob is thin provisioned - first allocation occurs on cluster_num=1 For last part to be true, very first write for blob has to be issued to LBA between cluster_size and 2*cluster_size. Causing allocation of second cluster in blobstore and assiging it LBA equal to number of LBAs per cluster. To fix this, case 1) disallows to RLE zeroes. Signed-off-by: Tomasz Zawadzki Change-Id: I136282407966310c882ca97c960e9a71c442c469 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/475494 Reviewed-by: Ben Walker Reviewed-by: Jim Harris Tested-by: SPDK CI Jenkins --- lib/blob/blobstore.c | 4 +- test/unit/lib/blob/blob.c/blob_ut.c | 134 ++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 1 deletion(-) diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index d4160d6244..abacd9429a 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -698,10 +698,12 @@ _spdk_blob_serialize_extent_rle(const struct spdk_blob *blob, lba_count = lba_per_cluster; extent_idx = 0; for (i = start_cluster + 1; i < blob->active.num_clusters; i++) { - if ((lba + lba_count) == blob->active.clusters[i]) { + if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) { + /* Run-length encode sequential non-zero LBA */ lba_count += lba_per_cluster; continue; } else if (lba == 0 && blob->active.clusters[i] == 0) { + /* Run-length encode unallocated clusters */ lba_count += lba_per_cluster; continue; } diff --git a/test/unit/lib/blob/blob.c/blob_ut.c b/test/unit/lib/blob/blob.c/blob_ut.c index 65ae151f9f..df2c4a132e 100644 --- a/test/unit/lib/blob/blob.c/blob_ut.c +++ b/test/unit/lib/blob/blob.c/blob_ut.c @@ -4688,6 +4688,139 @@ blob_thin_prov_rw(void) g_blobid = 0; } +static void +blob_thin_prov_rle(void) +{ + static const uint8_t zero[10 * 4096] = { 0 }; + struct spdk_blob_store *bs; + struct spdk_bs_dev *dev; + struct spdk_blob *blob; + struct spdk_io_channel *channel; + struct spdk_blob_opts opts; + spdk_blob_id blobid; + uint64_t free_clusters; + uint64_t page_size; + uint8_t payload_read[10 * 4096]; + uint8_t payload_write[10 * 4096]; + uint64_t write_bytes; + uint64_t read_bytes; + uint64_t io_unit; + + dev = init_dev(); + + spdk_bs_init(dev, NULL, bs_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + free_clusters = spdk_bs_free_cluster_count(bs); + page_size = spdk_bs_get_page_size(bs); + + spdk_blob_opts_init(&opts); + opts.thin_provision = true; + opts.num_clusters = 5; + + spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + CU_ASSERT(free_clusters == spdk_bs_free_cluster_count(bs)); + blobid = g_blobid; + + spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + blob = g_blob; + + channel = spdk_bs_alloc_io_channel(bs); + CU_ASSERT(channel != NULL); + + /* Target specifically second cluster in a blob as first allocation */ + io_unit = _spdk_bs_cluster_to_page(bs, 1) * _spdk_bs_io_unit_per_page(bs); + + /* Payload should be all zeros from unallocated clusters */ + memset(payload_read, 0xFF, sizeof(payload_read)); + spdk_blob_io_read(blob, channel, payload_read, io_unit, 10, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(memcmp(zero, payload_read, 10 * 4096) == 0); + + write_bytes = g_dev_write_bytes; + read_bytes = g_dev_read_bytes; + + /* Issue write to second cluster in a blob */ + memset(payload_write, 0xE5, sizeof(payload_write)); + spdk_blob_io_write(blob, channel, payload_write, io_unit, 10, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(free_clusters - 1 == spdk_bs_free_cluster_count(bs)); + /* For thin-provisioned blob we need to write 10 pages plus one page metadata and + * read 0 bytes */ + CU_ASSERT(g_dev_write_bytes - write_bytes == page_size * 11); + CU_ASSERT(g_dev_read_bytes - read_bytes == 0); + + spdk_blob_io_read(blob, channel, payload_read, io_unit, 10, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(memcmp(payload_write, payload_read, 10 * 4096) == 0); + + spdk_bs_free_io_channel(channel); + poll_threads(); + + spdk_blob_close(blob, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + /* Unload the blob store */ + spdk_bs_unload(g_bs, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; + g_blob = NULL; + g_blobid = 0; + + /* Load an existing blob store */ + dev = init_dev(); + spdk_bs_load(dev, NULL, bs_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + + bs = g_bs; + + spdk_bs_open_blob(g_bs, blobid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + blob = g_blob; + + channel = spdk_bs_alloc_io_channel(bs); + CU_ASSERT(channel != NULL); + + /* Read second cluster after blob reload to confirm data written */ + spdk_blob_io_read(blob, channel, payload_read, io_unit, 10, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(memcmp(payload_write, payload_read, 10 * 4096) == 0); + + spdk_bs_free_io_channel(channel); + poll_threads(); + + spdk_blob_close(blob, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + spdk_bs_delete_blob(bs, blobid, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + static void blob_thin_prov_rw_iov(void) { @@ -7447,6 +7580,7 @@ int main(int argc, char **argv) CU_add_test(suite, "blob_thin_prov_alloc", blob_thin_prov_alloc) == NULL || CU_add_test(suite, "blob_insert_cluster_msg", blob_insert_cluster_msg) == NULL || CU_add_test(suite, "blob_thin_prov_rw", blob_thin_prov_rw) == NULL || + CU_add_test(suite, "blob_thin_prov_rle", blob_thin_prov_rle) == NULL || CU_add_test(suite, "blob_thin_prov_rw_iov", blob_thin_prov_rw_iov) == NULL || CU_add_test(suite, "bs_load_iter", bs_load_iter) == NULL || CU_add_test(suite, "blob_snapshot_rw", blob_snapshot_rw) == NULL ||