blobfs: process one set_xattr at a time

During RocksDB testing with MySQL, we found cases
where blobfs would try to update and sync the length
xattr on the underlying blob while an existing update
was already in progress.  This was primarily driven
by RocksDB performing appends and syncs on the log
file from multiple writer threads.

The simplest way to fix this is to just process one
sync_request at a time.  There could be a tiny bit
of inefficiency here if multiple threads are appending
and syncing a file in parallel - we can look at some
additional optimizations if we find a case where that
is noticeable.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Reported-by: Changpeng Liu <changpeng.liu@intel.com>
Tested-by: Changpeng Liu <changpeng.liu@intel.com>
Change-Id: I7ab7814494d365bae8716efd0b828337286cc7b7

Reviewed-on: https://review.gerrithub.io/369490
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
Jim Harris 2017-07-12 15:49:24 -07:00
parent 86538ffa24
commit a6014eb2ad

View File

@ -166,6 +166,7 @@ struct spdk_fs_cb_args {
struct {
uint64_t offset;
TAILQ_ENTRY(spdk_fs_request) tailq;
bool xattr_in_progress;
} sync;
struct {
uint32_t num_clusters;
@ -1647,6 +1648,8 @@ __wake_caller(struct spdk_fs_cb_args *args)
sem_post(args->sem);
}
static void __check_sync_reqs(struct spdk_file *file);
static void
__file_cache_finish_sync(struct spdk_file *file)
{
@ -1654,19 +1657,18 @@ __file_cache_finish_sync(struct spdk_file *file)
struct spdk_fs_cb_args *sync_args;
pthread_spin_lock(&file->lock);
while (!TAILQ_EMPTY(&file->sync_requests)) {
sync_req = TAILQ_FIRST(&file->sync_requests);
sync_args = &sync_req->args;
if (sync_args->op.sync.offset > file->length_flushed) {
break;
}
BLOBFS_TRACE(file, "sync done offset=%jx\n", sync_args->op.sync.offset);
TAILQ_REMOVE(&file->sync_requests, sync_req, args.op.sync.tailq);
pthread_spin_unlock(&file->lock);
sync_args->fn.file_op(sync_args->arg, 0);
pthread_spin_lock(&file->lock);
free_fs_request(sync_req);
}
sync_req = TAILQ_FIRST(&file->sync_requests);
sync_args = &sync_req->args;
assert(sync_args->op.sync.offset <= file->length_flushed);
BLOBFS_TRACE(file, "sync done offset=%jx\n", sync_args->op.sync.offset);
TAILQ_REMOVE(&file->sync_requests, sync_req, args.op.sync.tailq);
pthread_spin_unlock(&file->lock);
sync_args->fn.file_op(sync_args->arg, 0);
__check_sync_reqs(file);
pthread_spin_lock(&file->lock);
free_fs_request(sync_req);
pthread_spin_unlock(&file->lock);
}
@ -1692,11 +1694,36 @@ __free_args(struct spdk_fs_cb_args *args)
}
}
static void
__check_sync_reqs(struct spdk_file *file)
{
struct spdk_fs_request *sync_req;
pthread_spin_lock(&file->lock);
TAILQ_FOREACH(sync_req, &file->sync_requests, args.op.sync.tailq) {
if (sync_req->args.op.sync.offset <= file->length_flushed) {
break;
}
}
if (sync_req != NULL && !sync_req->args.op.sync.xattr_in_progress) {
BLOBFS_TRACE(file, "set xattr length 0x%jx\n", file->length_flushed);
sync_req->args.op.sync.xattr_in_progress = true;
spdk_blob_md_set_xattr(file->blob, "length", &file->length_flushed,
sizeof(file->length_flushed));
pthread_spin_unlock(&file->lock);
spdk_bs_md_sync_blob(file->blob, __file_cache_finish_sync_bs_cb, file);
} else {
pthread_spin_unlock(&file->lock);
}
}
static void
__file_flush_done(void *arg, int bserrno)
{
struct spdk_fs_cb_args *args = arg;
struct spdk_fs_request *sync_req;
struct spdk_file *file = args->file;
struct cache_buffer *next = args->op.flush.cache_buffer;
@ -1714,12 +1741,6 @@ __file_flush_done(void *arg, int bserrno)
next = spdk_tree_find_buffer(file->tree, file->length_flushed);
}
TAILQ_FOREACH_REVERSE(sync_req, &file->sync_requests, sync_requests_head, args.op.sync.tailq) {
if (sync_req->args.op.sync.offset <= file->length_flushed) {
break;
}
}
/*
* Assert that there is no cached data that extends past the end of the underlying
* blob.
@ -1727,17 +1748,9 @@ __file_flush_done(void *arg, int bserrno)
assert(next == NULL || next->offset < __file_get_blob_size(file) ||
next->bytes_filled == 0);
if (sync_req != NULL) {
BLOBFS_TRACE(file, "set xattr length 0x%jx\n", file->length_flushed);
spdk_blob_md_set_xattr(file->blob, "length", &file->length_flushed,
sizeof(file->length_flushed));
pthread_spin_unlock(&file->lock);
pthread_spin_unlock(&file->lock);
spdk_bs_md_sync_blob(file->blob, __file_cache_finish_sync_bs_cb, file);
} else {
pthread_spin_unlock(&file->lock);
__file_cache_finish_sync(file);
}
__check_sync_reqs(file);
__file_flush(args);
}
@ -2153,6 +2166,7 @@ _file_sync(struct spdk_file *file, struct spdk_fs_channel *channel,
sync_args->fn.file_op = cb_fn;
sync_args->arg = cb_arg;
sync_args->op.sync.offset = file->append_pos;
sync_args->op.sync.xattr_in_progress = false;
TAILQ_INSERT_TAIL(&file->sync_requests, sync_req, args.op.sync.tailq);
pthread_spin_unlock(&file->lock);