blob: add write_zeroes support

Unmap does not guarantee that erased blocks will return all zeroes.
using write_zeroes when unmapping metadata gives the
desired behavior for a blob.

Only metadata pages will be cleared with write_zeroes in this patch;
blob data clusters will still call unmap.  This behavior may be made
configurable in a later patch (to allow the user to request zeroing of
clusters rather than just unmapping).

Change-Id: I1b210abac110867ce703bcfdeb634eb45aa9d5c9
Signed-off-by: Seth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/372004
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
Seth Howell 2017-07-27 10:58:51 -07:00 committed by Daniel Verkamp
parent 3fd7f28dc9
commit 3f9cbe513b
6 changed files with 83 additions and 13 deletions

View File

@ -132,6 +132,10 @@ struct spdk_bs_dev {
void (*flush)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
struct spdk_bs_dev_cb_args *cb_args);
void (*write_zeroes)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args);
void (*unmap)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args);

View File

@ -123,6 +123,19 @@ bdev_blob_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
}
}
static void
bdev_blob_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba,
uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
{
int rc;
rc = spdk_bdev_write_zeroes_blocks(__get_desc(dev), channel, lba,
lba_count, bdev_blob_io_complete, cb_args);
if (rc) {
cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
}
}
static void
bdev_blob_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba,
uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
@ -213,6 +226,7 @@ spdk_bdev_create_bs_dev(struct spdk_bdev *bdev, spdk_bdev_remove_cb_t remove_cb,
b->bs_dev.write = bdev_blob_write;
b->bs_dev.readv = bdev_blob_readv;
b->bs_dev.writev = bdev_blob_writev;
b->bs_dev.write_zeroes = bdev_blob_write_zeroes;
b->bs_dev.unmap = bdev_blob_unmap;
return &b->bs_dev;

View File

@ -760,7 +760,7 @@ _spdk_blob_persist_unmap_clusters(spdk_bs_sequence_t *seq, void *cb_arg, int bse
}
static void
_spdk_blob_persist_unmap_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
_spdk_blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_blob_persist_ctx *ctx = cb_arg;
struct spdk_blob *blob = ctx->blob;
@ -769,7 +769,7 @@ _spdk_blob_persist_unmap_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bs
/* This loop starts at 1 because the first page is special and handled
* below. The pages (except the first) are never written in place,
* so any pages in the clean list must be unmapped.
* so any pages in the clean list must be zeroed.
*/
for (i = 1; i < blob->clean.num_pages; i++) {
spdk_bit_array_clear(bs->used_md_pages, blob->clean.pages[i]);
@ -787,7 +787,7 @@ _spdk_blob_persist_unmap_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bs
}
static void
_spdk_blob_persist_unmap_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
_spdk_blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_blob_persist_ctx *ctx = cb_arg;
struct spdk_blob *blob = ctx->blob;
@ -797,21 +797,21 @@ _spdk_blob_persist_unmap_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrn
spdk_bs_batch_t *batch;
size_t i;
batch = spdk_bs_sequence_to_batch(seq, _spdk_blob_persist_unmap_pages_cpl, ctx);
batch = spdk_bs_sequence_to_batch(seq, _spdk_blob_persist_zero_pages_cpl, ctx);
lba_count = _spdk_bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
/* This loop starts at 1 because the first page is special and handled
* below. The pages (except the first) are never written in place,
* so any pages in the clean list must be unmapped.
* so any pages in the clean list must be zeroed.
*/
for (i = 1; i < blob->clean.num_pages; i++) {
lba = _spdk_bs_page_to_lba(bs, bs->md_start + blob->clean.pages[i]);
spdk_bs_batch_unmap(batch, lba, lba_count);
spdk_bs_batch_write_zeroes(batch, lba, lba_count);
}
/* The first page will only be unmapped if this is a delete. */
/* The first page will only be zeroed if this is a delete. */
if (blob->active.num_pages == 0) {
uint32_t page_num;
@ -819,7 +819,7 @@ _spdk_blob_persist_unmap_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrn
page_num = _spdk_bs_blobid_to_page(blob->id);
lba = _spdk_bs_page_to_lba(bs, bs->md_start + page_num);
spdk_bs_batch_unmap(batch, lba, lba_count);
spdk_bs_batch_write_zeroes(batch, lba, lba_count);
}
spdk_bs_batch_close(batch);
@ -837,7 +837,7 @@ _spdk_blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bs
if (blob->active.num_pages == 0) {
/* Move on to the next step */
_spdk_blob_persist_unmap_pages(seq, ctx, 0);
_spdk_blob_persist_zero_pages(seq, ctx, 0);
return;
}
@ -848,7 +848,7 @@ _spdk_blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bs
lba = _spdk_bs_page_to_lba(bs, bs->md_start + _spdk_bs_blobid_to_page(blob->id));
spdk_bs_sequence_write(seq, page, lba, lba_count,
_spdk_blob_persist_unmap_pages, ctx);
_spdk_blob_persist_zero_pages, ctx);
}
static void
@ -994,7 +994,7 @@ _spdk_blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
* Immediately jump to the clean up routine. */
assert(blob->clean.num_pages > 0);
ctx->idx = blob->clean.num_pages - 1;
_spdk_blob_persist_unmap_pages(seq, ctx, 0);
_spdk_blob_persist_zero_pages(seq, ctx, 0);
return;
}
@ -2177,8 +2177,8 @@ spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
return;
}
/* TRIM the entire device */
spdk_bs_sequence_unmap(seq, 0, bs->dev->blockcnt, _spdk_bs_init_trim_cpl, ctx);
/* Zero the entire device */
spdk_bs_sequence_write_zeroes(seq, 0, bs->dev->blockcnt, _spdk_bs_init_trim_cpl, ctx);
}
/* END spdk_bs_init */

View File

@ -226,6 +226,23 @@ spdk_bs_sequence_unmap(spdk_bs_sequence_t *seq,
&set->cb_args);
}
void
spdk_bs_sequence_write_zeroes(spdk_bs_sequence_t *seq,
uint64_t lba, uint32_t lba_count,
spdk_bs_sequence_cpl cb_fn, void *cb_arg)
{
struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)seq;
struct spdk_bs_channel *channel = set->channel;
SPDK_DEBUGLOG(SPDK_TRACE_BLOB_RW, "writing zeroes to %u blocks at LBA %lu\n", lba_count, lba);
set->u.sequence.cb_fn = cb_fn;
set->u.sequence.cb_arg = cb_arg;
channel->dev->write_zeroes(channel->dev, channel->dev_channel, lba, lba_count,
&set->cb_args);
}
void
spdk_bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno)
{
@ -342,6 +359,20 @@ spdk_bs_batch_unmap(spdk_bs_batch_t *batch,
&set->cb_args);
}
void
spdk_bs_batch_write_zeroes(spdk_bs_batch_t *batch,
uint64_t lba, uint32_t lba_count)
{
struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch;
struct spdk_bs_channel *channel = set->channel;
SPDK_DEBUGLOG(SPDK_TRACE_BLOB_RW, "Zeroing %u blocks at LBA %lu\n", lba_count, lba);
set->u.batch.outstanding_ops++;
channel->dev->write_zeroes(channel->dev, channel->dev_channel, lba, lba_count,
&set->cb_args);
}
void
spdk_bs_batch_close(spdk_bs_batch_t *batch)
{

View File

@ -155,6 +155,10 @@ void spdk_bs_sequence_unmap(spdk_bs_sequence_t *seq,
uint64_t lba, uint32_t lba_count,
spdk_bs_sequence_cpl cb_fn, void *cb_arg);
void spdk_bs_sequence_write_zeroes(spdk_bs_sequence_t *seq,
uint64_t lba, uint32_t lba_count,
spdk_bs_sequence_cpl cb_fn, void *cb_arg);
void spdk_bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno);
spdk_bs_batch_t *spdk_bs_batch_open(struct spdk_io_channel *channel,
@ -171,6 +175,9 @@ void spdk_bs_batch_flush(spdk_bs_batch_t *batch);
void spdk_bs_batch_unmap(spdk_bs_batch_t *batch,
uint64_t lba, uint32_t lba_count);
void spdk_bs_batch_write_zeroes(spdk_bs_batch_t *batch,
uint64_t lba, uint32_t lba_count);
void spdk_bs_batch_close(spdk_bs_batch_t *batch);
spdk_bs_batch_t *spdk_bs_sequence_to_batch(spdk_bs_sequence_t *seq,

View File

@ -151,6 +151,19 @@ dev_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
{
uint64_t offset, length;
offset = lba * DEV_BUFFER_BLOCKLEN;
length = lba_count * DEV_BUFFER_BLOCKLEN;
SPDK_CU_ASSERT_FATAL(offset + length <= DEV_BUFFER_SIZE);
cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0);
}
static void
dev_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args)
{
uint64_t offset, length;
offset = lba * DEV_BUFFER_BLOCKLEN;
length = lba_count * DEV_BUFFER_BLOCKLEN;
SPDK_CU_ASSERT_FATAL(offset + length <= DEV_BUFFER_SIZE);
@ -174,6 +187,7 @@ init_dev(void)
dev->writev = dev_writev;
dev->flush = dev_flush;
dev->unmap = dev_unmap;
dev->write_zeroes = dev_write_zeroes;
dev->blockcnt = DEV_BUFFER_BLOCKCNT;
dev->blocklen = DEV_BUFFER_BLOCKLEN;