numam-spdk/lib/blobfs/blobfs.c
Jim Harris cdd089a8c5 blobfs: don't flush a partial buffer with no sync pending
We flush a cache buffer once it's filled.  When the write
for that cache buffer has completed, we look to see if
there's more data to flush.  Currently if there's *any*
more data to flush, we will flush it, even if it's not
a full buffer.

That can hurt performance though.  Ideally we only want
to flush a partial buffer if there's been an explicit
sync operation that requires that partial buffer to be
flushed.  Otherwise we will end up writing the partial
buffer to disk, and then come back and write that data
again later when the buffer is full.

Add a new unit test to test for this condition.  This
patch breaks one of the existing unit tests which was
designed specifically around a RocksDB failure condition.
Change that file_length unit test to now write exactly
one CACHE_BUFFER, which still tests the general logic
making sure that we don't confuse the amount of data
flushed with the value written to the file's length
xattr.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I83795fb45afe854b38648d0e0c1a7928219307a2

Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455698
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2019-05-27 06:45:41 +00:00

2781 lines
68 KiB
C

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spdk/stdinc.h"
#include "spdk/blobfs.h"
#include "spdk/conf.h"
#include "blobfs_internal.h"
#include "spdk/queue.h"
#include "spdk/thread.h"
#include "spdk/assert.h"
#include "spdk/env.h"
#include "spdk/util.h"
#include "spdk_internal/log.h"
#include "spdk/trace.h"
#define BLOBFS_TRACE(file, str, args...) \
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s " str, file->name, ##args)
#define BLOBFS_TRACE_RW(file, str, args...) \
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS_RW, "file=%s " str, file->name, ##args)
#define BLOBFS_DEFAULT_CACHE_SIZE (4ULL * 1024 * 1024 * 1024)
#define SPDK_BLOBFS_DEFAULT_OPTS_CLUSTER_SZ (1024 * 1024)
static uint64_t g_fs_cache_size = BLOBFS_DEFAULT_CACHE_SIZE;
static struct spdk_mempool *g_cache_pool;
static TAILQ_HEAD(, spdk_file) g_caches;
static int g_fs_count = 0;
static pthread_mutex_t g_cache_init_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_spinlock_t g_caches_lock;
#define TRACE_GROUP_BLOBFS 0x7
#define TRACE_BLOBFS_XATTR_START SPDK_TPOINT_ID(TRACE_GROUP_BLOBFS, 0x0)
#define TRACE_BLOBFS_XATTR_END SPDK_TPOINT_ID(TRACE_GROUP_BLOBFS, 0x1)
#define TRACE_BLOBFS_OPEN SPDK_TPOINT_ID(TRACE_GROUP_BLOBFS, 0x2)
#define TRACE_BLOBFS_CLOSE SPDK_TPOINT_ID(TRACE_GROUP_BLOBFS, 0x3)
#define TRACE_BLOBFS_DELETE_START SPDK_TPOINT_ID(TRACE_GROUP_BLOBFS, 0x4)
#define TRACE_BLOBFS_DELETE_DONE SPDK_TPOINT_ID(TRACE_GROUP_BLOBFS, 0x5)
SPDK_TRACE_REGISTER_FN(blobfs_trace, "blobfs", TRACE_GROUP_BLOBFS)
{
spdk_trace_register_description("BLOBFS_XATTR_START",
TRACE_BLOBFS_XATTR_START,
OWNER_NONE, OBJECT_NONE, 0,
SPDK_TRACE_ARG_TYPE_STR,
"file: ");
spdk_trace_register_description("BLOBFS_XATTR_END",
TRACE_BLOBFS_XATTR_END,
OWNER_NONE, OBJECT_NONE, 0,
SPDK_TRACE_ARG_TYPE_STR,
"file: ");
spdk_trace_register_description("BLOBFS_OPEN",
TRACE_BLOBFS_OPEN,
OWNER_NONE, OBJECT_NONE, 0,
SPDK_TRACE_ARG_TYPE_STR,
"file: ");
spdk_trace_register_description("BLOBFS_CLOSE",
TRACE_BLOBFS_CLOSE,
OWNER_NONE, OBJECT_NONE, 0,
SPDK_TRACE_ARG_TYPE_STR,
"file: ");
spdk_trace_register_description("BLOBFS_DELETE_START",
TRACE_BLOBFS_DELETE_START,
OWNER_NONE, OBJECT_NONE, 0,
SPDK_TRACE_ARG_TYPE_STR,
"file: ");
spdk_trace_register_description("BLOBFS_DELETE_DONE",
TRACE_BLOBFS_DELETE_DONE,
OWNER_NONE, OBJECT_NONE, 0,
SPDK_TRACE_ARG_TYPE_STR,
"file: ");
}
void
spdk_cache_buffer_free(struct cache_buffer *cache_buffer)
{
spdk_mempool_put(g_cache_pool, cache_buffer->buf);
free(cache_buffer);
}
#define CACHE_READAHEAD_THRESHOLD (128 * 1024)
struct spdk_file {
struct spdk_filesystem *fs;
struct spdk_blob *blob;
char *name;
uint64_t trace_arg_name;
uint64_t length;
bool is_deleted;
bool open_for_writing;
uint64_t length_flushed;
uint64_t length_xattr;
uint64_t append_pos;
uint64_t seq_byte_count;
uint64_t next_seq_offset;
uint32_t priority;
TAILQ_ENTRY(spdk_file) tailq;
spdk_blob_id blobid;
uint32_t ref_count;
pthread_spinlock_t lock;
struct cache_buffer *last;
struct cache_tree *tree;
TAILQ_HEAD(open_requests_head, spdk_fs_request) open_requests;
TAILQ_HEAD(sync_requests_head, spdk_fs_request) sync_requests;
TAILQ_ENTRY(spdk_file) cache_tailq;
};
struct spdk_deleted_file {
spdk_blob_id id;
TAILQ_ENTRY(spdk_deleted_file) tailq;
};
struct spdk_filesystem {
struct spdk_blob_store *bs;
TAILQ_HEAD(, spdk_file) files;
struct spdk_bs_opts bs_opts;
struct spdk_bs_dev *bdev;
fs_send_request_fn send_request;
struct {
uint32_t max_ops;
struct spdk_io_channel *sync_io_channel;
struct spdk_fs_channel *sync_fs_channel;
} sync_target;
struct {
uint32_t max_ops;
struct spdk_io_channel *md_io_channel;
struct spdk_fs_channel *md_fs_channel;
} md_target;
struct {
uint32_t max_ops;
} io_target;
};
struct spdk_fs_cb_args {
union {
spdk_fs_op_with_handle_complete fs_op_with_handle;
spdk_fs_op_complete fs_op;
spdk_file_op_with_handle_complete file_op_with_handle;
spdk_file_op_complete file_op;
spdk_file_stat_op_complete stat_op;
} fn;
void *arg;
sem_t *sem;
struct spdk_filesystem *fs;
struct spdk_file *file;
int rc;
struct iovec *iovs;
uint32_t iovcnt;
struct iovec iov;
union {
struct {
TAILQ_HEAD(, spdk_deleted_file) deleted_files;
} fs_load;
struct {
uint64_t length;
} truncate;
struct {
struct spdk_io_channel *channel;
void *pin_buf;
int is_read;
off_t offset;
size_t length;
uint64_t start_lba;
uint64_t num_lba;
uint32_t blocklen;
} rw;
struct {
const char *old_name;
const char *new_name;
} rename;
struct {
struct cache_buffer *cache_buffer;
uint64_t length;
} flush;
struct {
struct cache_buffer *cache_buffer;
uint64_t length;
uint64_t offset;
} readahead;
struct {
/* offset of the file when the sync request was made */
uint64_t offset;
TAILQ_ENTRY(spdk_fs_request) tailq;
bool xattr_in_progress;
/* length written to the xattr for this file - this should
* always be the same as the offset if only one thread is
* writing to the file, but could differ if multiple threads
* are appending
*/
uint64_t length;
} sync;
struct {
uint32_t num_clusters;
} resize;
struct {
const char *name;
uint32_t flags;
TAILQ_ENTRY(spdk_fs_request) tailq;
} open;
struct {
const char *name;
struct spdk_blob *blob;
} create;
struct {
const char *name;
} delete;
struct {
const char *name;
} stat;
} op;
};
static void cache_free_buffers(struct spdk_file *file);
static void spdk_fs_io_device_unregister(struct spdk_filesystem *fs);
static void spdk_fs_free_io_channels(struct spdk_filesystem *fs);
void
spdk_fs_opts_init(struct spdk_blobfs_opts *opts)
{
opts->cluster_sz = SPDK_BLOBFS_DEFAULT_OPTS_CLUSTER_SZ;
}
static void
__initialize_cache(void)
{
assert(g_cache_pool == NULL);
g_cache_pool = spdk_mempool_create("spdk_fs_cache",
g_fs_cache_size / CACHE_BUFFER_SIZE,
CACHE_BUFFER_SIZE,
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY);
if (!g_cache_pool) {
SPDK_ERRLOG("Create mempool failed, you may "
"increase the memory and try again\n");
assert(false);
}
TAILQ_INIT(&g_caches);
pthread_spin_init(&g_caches_lock, 0);
}
static void
__free_cache(void)
{
assert(g_cache_pool != NULL);
spdk_mempool_free(g_cache_pool);
g_cache_pool = NULL;
}
static uint64_t
__file_get_blob_size(struct spdk_file *file)
{
uint64_t cluster_sz;
cluster_sz = file->fs->bs_opts.cluster_sz;
return cluster_sz * spdk_blob_get_num_clusters(file->blob);
}
struct spdk_fs_request {
struct spdk_fs_cb_args args;
TAILQ_ENTRY(spdk_fs_request) link;
struct spdk_fs_channel *channel;
};
struct spdk_fs_channel {
struct spdk_fs_request *req_mem;
TAILQ_HEAD(, spdk_fs_request) reqs;
sem_t sem;
struct spdk_filesystem *fs;
struct spdk_io_channel *bs_channel;
fs_send_request_fn send_request;
bool sync;
uint32_t outstanding_reqs;
pthread_spinlock_t lock;
};
/* For now, this is effectively an alias. But eventually we'll shift
* some data members over. */
struct spdk_fs_thread_ctx {
struct spdk_fs_channel ch;
};
static struct spdk_fs_request *
alloc_fs_request_with_iov(struct spdk_fs_channel *channel, uint32_t iovcnt)
{
struct spdk_fs_request *req;
struct iovec *iovs = NULL;
if (iovcnt > 1) {
iovs = calloc(iovcnt, sizeof(struct iovec));
if (!iovs) {
return NULL;
}
}
if (channel->sync) {
pthread_spin_lock(&channel->lock);
}
req = TAILQ_FIRST(&channel->reqs);
if (req) {
channel->outstanding_reqs++;
TAILQ_REMOVE(&channel->reqs, req, link);
}
if (channel->sync) {
pthread_spin_unlock(&channel->lock);
}
if (req == NULL) {
SPDK_ERRLOG("Cannot allocate req on spdk_fs_channel =%p\n", channel);
free(iovs);
return NULL;
}
memset(req, 0, sizeof(*req));
req->channel = channel;
if (iovcnt > 1) {
req->args.iovs = iovs;
} else {
req->args.iovs = &req->args.iov;
}
req->args.iovcnt = iovcnt;
return req;
}
static struct spdk_fs_request *
alloc_fs_request(struct spdk_fs_channel *channel)
{
return alloc_fs_request_with_iov(channel, 0);
}
static void
free_fs_request(struct spdk_fs_request *req)
{
struct spdk_fs_channel *channel = req->channel;
if (req->args.iovcnt > 1) {
free(req->args.iovs);
}
if (channel->sync) {
pthread_spin_lock(&channel->lock);
}
TAILQ_INSERT_HEAD(&req->channel->reqs, req, link);
channel->outstanding_reqs--;
if (channel->sync) {
pthread_spin_unlock(&channel->lock);
}
}
static int
_spdk_fs_channel_create(struct spdk_filesystem *fs, struct spdk_fs_channel *channel,
uint32_t max_ops)
{
uint32_t i;
channel->req_mem = calloc(max_ops, sizeof(struct spdk_fs_request));
if (!channel->req_mem) {
return -1;
}
channel->outstanding_reqs = 0;
TAILQ_INIT(&channel->reqs);
sem_init(&channel->sem, 0, 0);
for (i = 0; i < max_ops; i++) {
TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
}
channel->fs = fs;
return 0;
}
static int
_spdk_fs_md_channel_create(void *io_device, void *ctx_buf)
{
struct spdk_filesystem *fs;
struct spdk_fs_channel *channel = ctx_buf;
fs = SPDK_CONTAINEROF(io_device, struct spdk_filesystem, md_target);
return _spdk_fs_channel_create(fs, channel, fs->md_target.max_ops);
}
static int
_spdk_fs_sync_channel_create(void *io_device, void *ctx_buf)
{
struct spdk_filesystem *fs;
struct spdk_fs_channel *channel = ctx_buf;
fs = SPDK_CONTAINEROF(io_device, struct spdk_filesystem, sync_target);
return _spdk_fs_channel_create(fs, channel, fs->sync_target.max_ops);
}
static int
_spdk_fs_io_channel_create(void *io_device, void *ctx_buf)
{
struct spdk_filesystem *fs;
struct spdk_fs_channel *channel = ctx_buf;
fs = SPDK_CONTAINEROF(io_device, struct spdk_filesystem, io_target);
return _spdk_fs_channel_create(fs, channel, fs->io_target.max_ops);
}
static void
_spdk_fs_channel_destroy(void *io_device, void *ctx_buf)
{
struct spdk_fs_channel *channel = ctx_buf;
if (channel->outstanding_reqs > 0) {
SPDK_ERRLOG("channel freed with %" PRIu32 " outstanding requests!\n",
channel->outstanding_reqs);
}
free(channel->req_mem);
if (channel->bs_channel != NULL) {
spdk_bs_free_io_channel(channel->bs_channel);
}
}
static void
__send_request_direct(fs_request_fn fn, void *arg)
{
fn(arg);
}
static void
common_fs_bs_init(struct spdk_filesystem *fs, struct spdk_blob_store *bs)
{
fs->bs = bs;
fs->bs_opts.cluster_sz = spdk_bs_get_cluster_size(bs);
fs->md_target.md_fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs);
fs->md_target.md_fs_channel->send_request = __send_request_direct;
fs->sync_target.sync_fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs);
fs->sync_target.sync_fs_channel->send_request = __send_request_direct;
pthread_mutex_lock(&g_cache_init_lock);
if (g_fs_count == 0) {
__initialize_cache();
}
g_fs_count++;
pthread_mutex_unlock(&g_cache_init_lock);
}
static void
init_cb(void *ctx, struct spdk_blob_store *bs, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_filesystem *fs = args->fs;
if (bserrno == 0) {
common_fs_bs_init(fs, bs);
} else {
free(fs);
fs = NULL;
}
args->fn.fs_op_with_handle(args->arg, fs, bserrno);
free_fs_request(req);
}
static void
fs_conf_parse(void)
{
struct spdk_conf_section *sp;
sp = spdk_conf_find_section(NULL, "Blobfs");
if (sp == NULL) {
g_fs_cache_buffer_shift = CACHE_BUFFER_SHIFT_DEFAULT;
return;
}
g_fs_cache_buffer_shift = spdk_conf_section_get_intval(sp, "CacheBufferShift");
if (g_fs_cache_buffer_shift <= 0) {
g_fs_cache_buffer_shift = CACHE_BUFFER_SHIFT_DEFAULT;
}
}
static struct spdk_filesystem *
fs_alloc(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn)
{
struct spdk_filesystem *fs;
fs = calloc(1, sizeof(*fs));
if (fs == NULL) {
return NULL;
}
fs->bdev = dev;
fs->send_request = send_request_fn;
TAILQ_INIT(&fs->files);
fs->md_target.max_ops = 512;
spdk_io_device_register(&fs->md_target, _spdk_fs_md_channel_create, _spdk_fs_channel_destroy,
sizeof(struct spdk_fs_channel), "blobfs_md");
fs->md_target.md_io_channel = spdk_get_io_channel(&fs->md_target);
fs->md_target.md_fs_channel = spdk_io_channel_get_ctx(fs->md_target.md_io_channel);
fs->sync_target.max_ops = 512;
spdk_io_device_register(&fs->sync_target, _spdk_fs_sync_channel_create, _spdk_fs_channel_destroy,
sizeof(struct spdk_fs_channel), "blobfs_sync");
fs->sync_target.sync_io_channel = spdk_get_io_channel(&fs->sync_target);
fs->sync_target.sync_fs_channel = spdk_io_channel_get_ctx(fs->sync_target.sync_io_channel);
fs->io_target.max_ops = 512;
spdk_io_device_register(&fs->io_target, _spdk_fs_io_channel_create, _spdk_fs_channel_destroy,
sizeof(struct spdk_fs_channel), "blobfs_io");
return fs;
}
static void
__wake_caller(void *arg, int fserrno)
{
struct spdk_fs_cb_args *args = arg;
args->rc = fserrno;
sem_post(args->sem);
}
void
spdk_fs_init(struct spdk_bs_dev *dev, struct spdk_blobfs_opts *opt,
fs_send_request_fn send_request_fn,
spdk_fs_op_with_handle_complete cb_fn, void *cb_arg)
{
struct spdk_filesystem *fs;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
struct spdk_bs_opts opts = {};
fs = fs_alloc(dev, send_request_fn);
if (fs == NULL) {
cb_fn(cb_arg, NULL, -ENOMEM);
return;
}
fs_conf_parse();
req = alloc_fs_request(fs->md_target.md_fs_channel);
if (req == NULL) {
spdk_fs_free_io_channels(fs);
spdk_fs_io_device_unregister(fs);
cb_fn(cb_arg, NULL, -ENOMEM);
return;
}
args = &req->args;
args->fn.fs_op_with_handle = cb_fn;
args->arg = cb_arg;
args->fs = fs;
spdk_bs_opts_init(&opts);
snprintf(opts.bstype.bstype, sizeof(opts.bstype.bstype), "BLOBFS");
if (opt) {
opts.cluster_sz = opt->cluster_sz;
}
spdk_bs_init(dev, &opts, init_cb, req);
}
static struct spdk_file *
file_alloc(struct spdk_filesystem *fs)
{
struct spdk_file *file;
file = calloc(1, sizeof(*file));
if (file == NULL) {
return NULL;
}
file->tree = calloc(1, sizeof(*file->tree));
if (file->tree == NULL) {
free(file);
return NULL;
}
file->fs = fs;
TAILQ_INIT(&file->open_requests);
TAILQ_INIT(&file->sync_requests);
pthread_spin_init(&file->lock, 0);
TAILQ_INSERT_TAIL(&fs->files, file, tailq);
file->priority = SPDK_FILE_PRIORITY_LOW;
return file;
}
static void fs_load_done(void *ctx, int bserrno);
static int
_handle_deleted_files(struct spdk_fs_request *req)
{
struct spdk_fs_cb_args *args = &req->args;
struct spdk_filesystem *fs = args->fs;
if (!TAILQ_EMPTY(&args->op.fs_load.deleted_files)) {
struct spdk_deleted_file *deleted_file;
deleted_file = TAILQ_FIRST(&args->op.fs_load.deleted_files);
TAILQ_REMOVE(&args->op.fs_load.deleted_files, deleted_file, tailq);
spdk_bs_delete_blob(fs->bs, deleted_file->id, fs_load_done, req);
free(deleted_file);
return 0;
}
return 1;
}
static void
fs_load_done(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_filesystem *fs = args->fs;
/* The filesystem has been loaded. Now check if there are any files that
* were marked for deletion before last unload. Do not complete the
* fs_load callback until all of them have been deleted on disk.
*/
if (_handle_deleted_files(req) == 0) {
/* We found a file that's been marked for deleting but not actually
* deleted yet. This function will get called again once the delete
* operation is completed.
*/
return;
}
args->fn.fs_op_with_handle(args->arg, fs, 0);
free_fs_request(req);
}
static void
_file_build_trace_arg_name(struct spdk_file *f)
{
f->trace_arg_name = 0;
memcpy(&f->trace_arg_name, f->name,
spdk_min(sizeof(f->trace_arg_name), strlen(f->name)));
}
static void
iter_cb(void *ctx, struct spdk_blob *blob, int rc)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_filesystem *fs = args->fs;
uint64_t *length;
const char *name;
uint32_t *is_deleted;
size_t value_len;
if (rc < 0) {
args->fn.fs_op_with_handle(args->arg, fs, rc);
free_fs_request(req);
return;
}
rc = spdk_blob_get_xattr_value(blob, "name", (const void **)&name, &value_len);
if (rc < 0) {
args->fn.fs_op_with_handle(args->arg, fs, rc);
free_fs_request(req);
return;
}
rc = spdk_blob_get_xattr_value(blob, "length", (const void **)&length, &value_len);
if (rc < 0) {
args->fn.fs_op_with_handle(args->arg, fs, rc);
free_fs_request(req);
return;
}
assert(value_len == 8);
/* This file could be deleted last time without close it, then app crashed, so we delete it now */
rc = spdk_blob_get_xattr_value(blob, "is_deleted", (const void **)&is_deleted, &value_len);
if (rc < 0) {
struct spdk_file *f;
f = file_alloc(fs);
if (f == NULL) {
args->fn.fs_op_with_handle(args->arg, fs, -ENOMEM);
free_fs_request(req);
return;
}
f->name = strdup(name);
_file_build_trace_arg_name(f);
f->blobid = spdk_blob_get_id(blob);
f->length = *length;
f->length_flushed = *length;
f->length_xattr = *length;
f->append_pos = *length;
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "added file %s length=%ju\n", f->name, f->length);
} else {
struct spdk_deleted_file *deleted_file;
deleted_file = calloc(1, sizeof(*deleted_file));
if (deleted_file == NULL) {
args->fn.fs_op_with_handle(args->arg, fs, -ENOMEM);
free_fs_request(req);
return;
}
deleted_file->id = spdk_blob_get_id(blob);
TAILQ_INSERT_TAIL(&args->op.fs_load.deleted_files, deleted_file, tailq);
}
}
static void
load_cb(void *ctx, struct spdk_blob_store *bs, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_filesystem *fs = args->fs;
struct spdk_bs_type bstype;
static const struct spdk_bs_type blobfs_type = {"BLOBFS"};
static const struct spdk_bs_type zeros;
if (bserrno != 0) {
args->fn.fs_op_with_handle(args->arg, NULL, bserrno);
free_fs_request(req);
free(fs);
return;
}
bstype = spdk_bs_get_bstype(bs);
if (!memcmp(&bstype, &zeros, sizeof(bstype))) {
SPDK_DEBUGLOG(SPDK_LOG_BLOB, "assigning bstype\n");
spdk_bs_set_bstype(bs, blobfs_type);
} else if (memcmp(&bstype, &blobfs_type, sizeof(bstype))) {
SPDK_DEBUGLOG(SPDK_LOG_BLOB, "not blobfs\n");
SPDK_LOGDUMP(SPDK_LOG_BLOB, "bstype", &bstype, sizeof(bstype));
args->fn.fs_op_with_handle(args->arg, NULL, bserrno);
free_fs_request(req);
free(fs);
return;
}
common_fs_bs_init(fs, bs);
fs_load_done(req, 0);
}
static void
spdk_fs_io_device_unregister(struct spdk_filesystem *fs)
{
assert(fs != NULL);
spdk_io_device_unregister(&fs->md_target, NULL);
spdk_io_device_unregister(&fs->sync_target, NULL);
spdk_io_device_unregister(&fs->io_target, NULL);
free(fs);
}
static void
spdk_fs_free_io_channels(struct spdk_filesystem *fs)
{
assert(fs != NULL);
spdk_fs_free_io_channel(fs->md_target.md_io_channel);
spdk_fs_free_io_channel(fs->sync_target.sync_io_channel);
}
void
spdk_fs_load(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn,
spdk_fs_op_with_handle_complete cb_fn, void *cb_arg)
{
struct spdk_filesystem *fs;
struct spdk_fs_cb_args *args;
struct spdk_fs_request *req;
struct spdk_bs_opts bs_opts;
fs = fs_alloc(dev, send_request_fn);
if (fs == NULL) {
cb_fn(cb_arg, NULL, -ENOMEM);
return;
}
fs_conf_parse();
req = alloc_fs_request(fs->md_target.md_fs_channel);
if (req == NULL) {
spdk_fs_free_io_channels(fs);
spdk_fs_io_device_unregister(fs);
cb_fn(cb_arg, NULL, -ENOMEM);
return;
}
args = &req->args;
args->fn.fs_op_with_handle = cb_fn;
args->arg = cb_arg;
args->fs = fs;
TAILQ_INIT(&args->op.fs_load.deleted_files);
spdk_bs_opts_init(&bs_opts);
bs_opts.iter_cb_fn = iter_cb;
bs_opts.iter_cb_arg = req;
spdk_bs_load(dev, &bs_opts, load_cb, req);
}
static void
unload_cb(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_filesystem *fs = args->fs;
struct spdk_file *file, *tmp;
TAILQ_FOREACH_SAFE(file, &fs->files, tailq, tmp) {
TAILQ_REMOVE(&fs->files, file, tailq);
cache_free_buffers(file);
free(file->name);
free(file->tree);
free(file);
}
pthread_mutex_lock(&g_cache_init_lock);
g_fs_count--;
if (g_fs_count == 0) {
__free_cache();
}
pthread_mutex_unlock(&g_cache_init_lock);
args->fn.fs_op(args->arg, bserrno);
free(req);
spdk_fs_io_device_unregister(fs);
}
void
spdk_fs_unload(struct spdk_filesystem *fs, spdk_fs_op_complete cb_fn, void *cb_arg)
{
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
/*
* We must free the md_channel before unloading the blobstore, so just
* allocate this request from the general heap.
*/
req = calloc(1, sizeof(*req));
if (req == NULL) {
cb_fn(cb_arg, -ENOMEM);
return;
}
args = &req->args;
args->fn.fs_op = cb_fn;
args->arg = cb_arg;
args->fs = fs;
spdk_fs_free_io_channels(fs);
spdk_bs_unload(fs->bs, unload_cb, req);
}
static struct spdk_file *
fs_find_file(struct spdk_filesystem *fs, const char *name)
{
struct spdk_file *file;
TAILQ_FOREACH(file, &fs->files, tailq) {
if (!strncmp(name, file->name, SPDK_FILE_NAME_MAX)) {
return file;
}
}
return NULL;
}
void
spdk_fs_file_stat_async(struct spdk_filesystem *fs, const char *name,
spdk_file_stat_op_complete cb_fn, void *cb_arg)
{
struct spdk_file_stat stat;
struct spdk_file *f = NULL;
if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
cb_fn(cb_arg, NULL, -ENAMETOOLONG);
return;
}
f = fs_find_file(fs, name);
if (f != NULL) {
stat.blobid = f->blobid;
stat.size = f->append_pos >= f->length ? f->append_pos : f->length;
cb_fn(cb_arg, &stat, 0);
return;
}
cb_fn(cb_arg, NULL, -ENOENT);
}
static void
__copy_stat(void *arg, struct spdk_file_stat *stat, int fserrno)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
args->rc = fserrno;
if (fserrno == 0) {
memcpy(args->arg, stat, sizeof(*stat));
}
sem_post(args->sem);
}
static void
__file_stat(void *arg)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
spdk_fs_file_stat_async(args->fs, args->op.stat.name,
args->fn.stat_op, req);
}
int
spdk_fs_file_stat(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
const char *name, struct spdk_file_stat *stat)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
struct spdk_fs_request *req;
int rc;
req = alloc_fs_request(channel);
if (req == NULL) {
return -ENOMEM;
}
req->args.fs = fs;
req->args.op.stat.name = name;
req->args.fn.stat_op = __copy_stat;
req->args.arg = stat;
req->args.sem = &channel->sem;
channel->send_request(__file_stat, req);
sem_wait(&channel->sem);
rc = req->args.rc;
free_fs_request(req);
return rc;
}
static void
fs_create_blob_close_cb(void *ctx, int bserrno)
{
int rc;
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
rc = args->rc ? args->rc : bserrno;
args->fn.file_op(args->arg, rc);
free_fs_request(req);
}
static void
fs_create_blob_resize_cb(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *f = args->file;
struct spdk_blob *blob = args->op.create.blob;
uint64_t length = 0;
args->rc = bserrno;
if (bserrno) {
spdk_blob_close(blob, fs_create_blob_close_cb, args);
return;
}
spdk_blob_set_xattr(blob, "name", f->name, strlen(f->name) + 1);
spdk_blob_set_xattr(blob, "length", &length, sizeof(length));
spdk_blob_close(blob, fs_create_blob_close_cb, args);
}
static void
fs_create_blob_open_cb(void *ctx, struct spdk_blob *blob, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
if (bserrno) {
args->fn.file_op(args->arg, bserrno);
free_fs_request(req);
return;
}
args->op.create.blob = blob;
spdk_blob_resize(blob, 1, fs_create_blob_resize_cb, req);
}
static void
fs_create_blob_create_cb(void *ctx, spdk_blob_id blobid, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *f = args->file;
if (bserrno) {
args->fn.file_op(args->arg, bserrno);
free_fs_request(req);
return;
}
f->blobid = blobid;
spdk_bs_open_blob(f->fs->bs, blobid, fs_create_blob_open_cb, req);
}
void
spdk_fs_create_file_async(struct spdk_filesystem *fs, const char *name,
spdk_file_op_complete cb_fn, void *cb_arg)
{
struct spdk_file *file;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
cb_fn(cb_arg, -ENAMETOOLONG);
return;
}
file = fs_find_file(fs, name);
if (file != NULL) {
cb_fn(cb_arg, -EEXIST);
return;
}
file = file_alloc(fs);
if (file == NULL) {
cb_fn(cb_arg, -ENOMEM);
return;
}
req = alloc_fs_request(fs->md_target.md_fs_channel);
if (req == NULL) {
cb_fn(cb_arg, -ENOMEM);
return;
}
args = &req->args;
args->file = file;
args->fn.file_op = cb_fn;
args->arg = cb_arg;
file->name = strdup(name);
_file_build_trace_arg_name(file);
spdk_bs_create_blob(fs->bs, fs_create_blob_create_cb, args);
}
static void
__fs_create_file_done(void *arg, int fserrno)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
args->rc = fserrno;
sem_post(args->sem);
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", args->op.create.name);
}
static void
__fs_create_file(void *arg)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", args->op.create.name);
spdk_fs_create_file_async(args->fs, args->op.create.name, __fs_create_file_done, req);
}
int
spdk_fs_create_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx, const char *name)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
int rc;
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", name);
req = alloc_fs_request(channel);
if (req == NULL) {
return -ENOMEM;
}
args = &req->args;
args->fs = fs;
args->op.create.name = name;
args->sem = &channel->sem;
fs->send_request(__fs_create_file, req);
sem_wait(&channel->sem);
rc = args->rc;
free_fs_request(req);
return rc;
}
static void
fs_open_blob_done(void *ctx, struct spdk_blob *blob, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *f = args->file;
f->blob = blob;
while (!TAILQ_EMPTY(&f->open_requests)) {
req = TAILQ_FIRST(&f->open_requests);
args = &req->args;
TAILQ_REMOVE(&f->open_requests, req, args.op.open.tailq);
spdk_trace_record(TRACE_BLOBFS_OPEN, 0, 0, 0, f->trace_arg_name);
args->fn.file_op_with_handle(args->arg, f, bserrno);
free_fs_request(req);
}
}
static void
fs_open_blob_create_cb(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *file = args->file;
struct spdk_filesystem *fs = args->fs;
if (file == NULL) {
/*
* This is from an open with CREATE flag - the file
* is now created so look it up in the file list for this
* filesystem.
*/
file = fs_find_file(fs, args->op.open.name);
assert(file != NULL);
args->file = file;
}
file->ref_count++;
TAILQ_INSERT_TAIL(&file->open_requests, req, args.op.open.tailq);
if (file->ref_count == 1) {
assert(file->blob == NULL);
spdk_bs_open_blob(fs->bs, file->blobid, fs_open_blob_done, req);
} else if (file->blob != NULL) {
fs_open_blob_done(req, file->blob, 0);
} else {
/*
* The blob open for this file is in progress due to a previous
* open request. When that open completes, it will invoke the
* open callback for this request.
*/
}
}
void
spdk_fs_open_file_async(struct spdk_filesystem *fs, const char *name, uint32_t flags,
spdk_file_op_with_handle_complete cb_fn, void *cb_arg)
{
struct spdk_file *f = NULL;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
cb_fn(cb_arg, NULL, -ENAMETOOLONG);
return;
}
f = fs_find_file(fs, name);
if (f == NULL && !(flags & SPDK_BLOBFS_OPEN_CREATE)) {
cb_fn(cb_arg, NULL, -ENOENT);
return;
}
if (f != NULL && f->is_deleted == true) {
cb_fn(cb_arg, NULL, -ENOENT);
return;
}
req = alloc_fs_request(fs->md_target.md_fs_channel);
if (req == NULL) {
cb_fn(cb_arg, NULL, -ENOMEM);
return;
}
args = &req->args;
args->fn.file_op_with_handle = cb_fn;
args->arg = cb_arg;
args->file = f;
args->fs = fs;
args->op.open.name = name;
if (f == NULL) {
spdk_fs_create_file_async(fs, name, fs_open_blob_create_cb, req);
} else {
fs_open_blob_create_cb(req, 0);
}
}
static void
__fs_open_file_done(void *arg, struct spdk_file *file, int bserrno)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
args->file = file;
__wake_caller(args, bserrno);
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", args->op.open.name);
}
static void
__fs_open_file(void *arg)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", args->op.open.name);
spdk_fs_open_file_async(args->fs, args->op.open.name, args->op.open.flags,
__fs_open_file_done, req);
}
int
spdk_fs_open_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
const char *name, uint32_t flags, struct spdk_file **file)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
int rc;
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", name);
req = alloc_fs_request(channel);
if (req == NULL) {
return -ENOMEM;
}
args = &req->args;
args->fs = fs;
args->op.open.name = name;
args->op.open.flags = flags;
args->sem = &channel->sem;
fs->send_request(__fs_open_file, req);
sem_wait(&channel->sem);
rc = args->rc;
if (rc == 0) {
*file = args->file;
} else {
*file = NULL;
}
free_fs_request(req);
return rc;
}
static void
fs_rename_blob_close_cb(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
args->fn.fs_op(args->arg, bserrno);
free_fs_request(req);
}
static void
fs_rename_blob_open_cb(void *ctx, struct spdk_blob *blob, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
const char *new_name = args->op.rename.new_name;
spdk_blob_set_xattr(blob, "name", new_name, strlen(new_name) + 1);
spdk_blob_close(blob, fs_rename_blob_close_cb, req);
}
static void
__spdk_fs_md_rename_file(struct spdk_fs_request *req)
{
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *f;
f = fs_find_file(args->fs, args->op.rename.old_name);
if (f == NULL) {
args->fn.fs_op(args->arg, -ENOENT);
free_fs_request(req);
return;
}
free(f->name);
f->name = strdup(args->op.rename.new_name);
_file_build_trace_arg_name(f);
args->file = f;
spdk_bs_open_blob(args->fs->bs, f->blobid, fs_rename_blob_open_cb, req);
}
static void
fs_rename_delete_done(void *arg, int fserrno)
{
__spdk_fs_md_rename_file(arg);
}
void
spdk_fs_rename_file_async(struct spdk_filesystem *fs,
const char *old_name, const char *new_name,
spdk_file_op_complete cb_fn, void *cb_arg)
{
struct spdk_file *f;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "old=%s new=%s\n", old_name, new_name);
if (strnlen(new_name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
cb_fn(cb_arg, -ENAMETOOLONG);
return;
}
req = alloc_fs_request(fs->md_target.md_fs_channel);
if (req == NULL) {
cb_fn(cb_arg, -ENOMEM);
return;
}
args = &req->args;
args->fn.fs_op = cb_fn;
args->fs = fs;
args->arg = cb_arg;
args->op.rename.old_name = old_name;
args->op.rename.new_name = new_name;
f = fs_find_file(fs, new_name);
if (f == NULL) {
__spdk_fs_md_rename_file(req);
return;
}
/*
* The rename overwrites an existing file. So delete the existing file, then
* do the actual rename.
*/
spdk_fs_delete_file_async(fs, new_name, fs_rename_delete_done, req);
}
static void
__fs_rename_file_done(void *arg, int fserrno)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
__wake_caller(args, fserrno);
}
static void
__fs_rename_file(void *arg)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
spdk_fs_rename_file_async(args->fs, args->op.rename.old_name, args->op.rename.new_name,
__fs_rename_file_done, req);
}
int
spdk_fs_rename_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
const char *old_name, const char *new_name)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
int rc;
req = alloc_fs_request(channel);
if (req == NULL) {
return -ENOMEM;
}
args = &req->args;
args->fs = fs;
args->op.rename.old_name = old_name;
args->op.rename.new_name = new_name;
args->sem = &channel->sem;
fs->send_request(__fs_rename_file, req);
sem_wait(&channel->sem);
rc = args->rc;
free_fs_request(req);
return rc;
}
static void
blob_delete_cb(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
args->fn.file_op(args->arg, bserrno);
free_fs_request(req);
}
void
spdk_fs_delete_file_async(struct spdk_filesystem *fs, const char *name,
spdk_file_op_complete cb_fn, void *cb_arg)
{
struct spdk_file *f;
spdk_blob_id blobid;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", name);
if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
cb_fn(cb_arg, -ENAMETOOLONG);
return;
}
f = fs_find_file(fs, name);
if (f == NULL) {
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "Cannot find the file=%s to deleted\n", name);
cb_fn(cb_arg, -ENOENT);
return;
}
req = alloc_fs_request(fs->md_target.md_fs_channel);
if (req == NULL) {
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "Cannot allocate the req for the file=%s to deleted\n", name);
cb_fn(cb_arg, -ENOMEM);
return;
}
args = &req->args;
args->fn.file_op = cb_fn;
args->arg = cb_arg;
if (f->ref_count > 0) {
/* If the ref > 0, we mark the file as deleted and delete it when we close it. */
f->is_deleted = true;
spdk_blob_set_xattr(f->blob, "is_deleted", &f->is_deleted, sizeof(bool));
spdk_blob_sync_md(f->blob, blob_delete_cb, req);
return;
}
TAILQ_REMOVE(&fs->files, f, tailq);
cache_free_buffers(f);
blobid = f->blobid;
free(f->name);
free(f->tree);
free(f);
spdk_bs_delete_blob(fs->bs, blobid, blob_delete_cb, req);
}
static uint64_t
fs_name_to_uint64(const char *name)
{
uint64_t result = 0;
memcpy(&result, name, spdk_min(sizeof(result), strlen(name)));
return result;
}
static void
__fs_delete_file_done(void *arg, int fserrno)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
spdk_trace_record(TRACE_BLOBFS_DELETE_DONE, 0, 0, 0, fs_name_to_uint64(args->op.delete.name));
__wake_caller(args, fserrno);
}
static void
__fs_delete_file(void *arg)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
spdk_trace_record(TRACE_BLOBFS_DELETE_START, 0, 0, 0, fs_name_to_uint64(args->op.delete.name));
spdk_fs_delete_file_async(args->fs, args->op.delete.name, __fs_delete_file_done, req);
}
int
spdk_fs_delete_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
const char *name)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
int rc;
req = alloc_fs_request(channel);
if (req == NULL) {
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "Cannot allocate req to delete file=%s\n", name);
return -ENOMEM;
}
args = &req->args;
args->fs = fs;
args->op.delete.name = name;
args->sem = &channel->sem;
fs->send_request(__fs_delete_file, req);
sem_wait(&channel->sem);
rc = args->rc;
free_fs_request(req);
return rc;
}
spdk_fs_iter
spdk_fs_iter_first(struct spdk_filesystem *fs)
{
struct spdk_file *f;
f = TAILQ_FIRST(&fs->files);
return f;
}
spdk_fs_iter
spdk_fs_iter_next(spdk_fs_iter iter)
{
struct spdk_file *f = iter;
if (f == NULL) {
return NULL;
}
f = TAILQ_NEXT(f, tailq);
return f;
}
const char *
spdk_file_get_name(struct spdk_file *file)
{
return file->name;
}
uint64_t
spdk_file_get_length(struct spdk_file *file)
{
uint64_t length;
assert(file != NULL);
length = file->append_pos >= file->length ? file->append_pos : file->length;
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s length=0x%jx\n", file->name, length);
return length;
}
static void
fs_truncate_complete_cb(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
args->fn.file_op(args->arg, bserrno);
free_fs_request(req);
}
static void
fs_truncate_resize_cb(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *file = args->file;
uint64_t *length = &args->op.truncate.length;
if (bserrno) {
args->fn.file_op(args->arg, bserrno);
free_fs_request(req);
return;
}
spdk_blob_set_xattr(file->blob, "length", length, sizeof(*length));
file->length = *length;
if (file->append_pos > file->length) {
file->append_pos = file->length;
}
spdk_blob_sync_md(file->blob, fs_truncate_complete_cb, req);
}
static uint64_t
__bytes_to_clusters(uint64_t length, uint64_t cluster_sz)
{
return (length + cluster_sz - 1) / cluster_sz;
}
void
spdk_file_truncate_async(struct spdk_file *file, uint64_t length,
spdk_file_op_complete cb_fn, void *cb_arg)
{
struct spdk_filesystem *fs;
size_t num_clusters;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s old=0x%jx new=0x%jx\n", file->name, file->length, length);
if (length == file->length) {
cb_fn(cb_arg, 0);
return;
}
req = alloc_fs_request(file->fs->md_target.md_fs_channel);
if (req == NULL) {
cb_fn(cb_arg, -ENOMEM);
return;
}
args = &req->args;
args->fn.file_op = cb_fn;
args->arg = cb_arg;
args->file = file;
args->op.truncate.length = length;
fs = file->fs;
num_clusters = __bytes_to_clusters(length, fs->bs_opts.cluster_sz);
spdk_blob_resize(file->blob, num_clusters, fs_truncate_resize_cb, req);
}
static void
__truncate(void *arg)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
spdk_file_truncate_async(args->file, args->op.truncate.length,
args->fn.file_op, args);
}
int
spdk_file_truncate(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx,
uint64_t length)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
int rc;
req = alloc_fs_request(channel);
if (req == NULL) {
return -ENOMEM;
}
args = &req->args;
args->file = file;
args->op.truncate.length = length;
args->fn.file_op = __wake_caller;
args->sem = &channel->sem;
channel->send_request(__truncate, req);
sem_wait(&channel->sem);
rc = args->rc;
free_fs_request(req);
return rc;
}
static void
__rw_done(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
spdk_free(args->op.rw.pin_buf);
args->fn.file_op(args->arg, bserrno);
free_fs_request(req);
}
static void
__read_done(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
assert(req != NULL);
if (args->op.rw.is_read) {
memcpy(args->iovs[0].iov_base,
args->op.rw.pin_buf + (args->op.rw.offset & (args->op.rw.blocklen - 1)),
args->iovs[0].iov_len);
__rw_done(req, 0);
} else {
memcpy(args->op.rw.pin_buf + (args->op.rw.offset & (args->op.rw.blocklen - 1)),
args->iovs[0].iov_base,
args->iovs[0].iov_len);
spdk_blob_io_write(args->file->blob, args->op.rw.channel,
args->op.rw.pin_buf,
args->op.rw.start_lba, args->op.rw.num_lba,
__rw_done, req);
}
}
static void
__do_blob_read(void *ctx, int fserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
if (fserrno) {
__rw_done(req, fserrno);
return;
}
spdk_blob_io_read(args->file->blob, args->op.rw.channel,
args->op.rw.pin_buf,
args->op.rw.start_lba, args->op.rw.num_lba,
__read_done, req);
}
static void
__get_page_parameters(struct spdk_file *file, uint64_t offset, uint64_t length,
uint64_t *start_lba, uint32_t *lba_size, uint64_t *num_lba)
{
uint64_t end_lba;
*lba_size = spdk_bs_get_io_unit_size(file->fs->bs);
*start_lba = offset / *lba_size;
end_lba = (offset + length - 1) / *lba_size;
*num_lba = (end_lba - *start_lba + 1);
}
static void
__readwrite(struct spdk_file *file, struct spdk_io_channel *_channel,
void *payload, uint64_t offset, uint64_t length,
spdk_file_op_complete cb_fn, void *cb_arg, int is_read)
{
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
uint64_t start_lba, num_lba, pin_buf_length;
uint32_t lba_size;
if (is_read && offset + length > file->length) {
cb_fn(cb_arg, -EINVAL);
return;
}
req = alloc_fs_request_with_iov(channel, 1);
if (req == NULL) {
cb_fn(cb_arg, -ENOMEM);
return;
}
__get_page_parameters(file, offset, length, &start_lba, &lba_size, &num_lba);
args = &req->args;
args->fn.file_op = cb_fn;
args->arg = cb_arg;
args->file = file;
args->op.rw.channel = channel->bs_channel;
args->iovs[0].iov_base = payload;
args->iovs[0].iov_len = (size_t)length;
args->op.rw.is_read = is_read;
args->op.rw.offset = offset;
args->op.rw.blocklen = lba_size;
pin_buf_length = num_lba * lba_size;
args->op.rw.pin_buf = spdk_malloc(pin_buf_length, lba_size, NULL,
SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
if (args->op.rw.pin_buf == NULL) {
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "Failed to allocate buf for: file=%s offset=%jx length=%jx\n",
file->name, offset, length);
free_fs_request(req);
cb_fn(cb_arg, -ENOMEM);
return;
}
args->op.rw.start_lba = start_lba;
args->op.rw.num_lba = num_lba;
if (!is_read && file->length < offset + length) {
spdk_file_truncate_async(file, offset + length, __do_blob_read, req);
} else {
__do_blob_read(req, 0);
}
}
void
spdk_file_write_async(struct spdk_file *file, struct spdk_io_channel *channel,
void *payload, uint64_t offset, uint64_t length,
spdk_file_op_complete cb_fn, void *cb_arg)
{
__readwrite(file, channel, payload, offset, length, cb_fn, cb_arg, 0);
}
void
spdk_file_read_async(struct spdk_file *file, struct spdk_io_channel *channel,
void *payload, uint64_t offset, uint64_t length,
spdk_file_op_complete cb_fn, void *cb_arg)
{
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s offset=%jx length=%jx\n",
file->name, offset, length);
__readwrite(file, channel, payload, offset, length, cb_fn, cb_arg, 1);
}
struct spdk_io_channel *
spdk_fs_alloc_io_channel(struct spdk_filesystem *fs)
{
struct spdk_io_channel *io_channel;
struct spdk_fs_channel *fs_channel;
io_channel = spdk_get_io_channel(&fs->io_target);
fs_channel = spdk_io_channel_get_ctx(io_channel);
fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs);
fs_channel->send_request = __send_request_direct;
return io_channel;
}
void
spdk_fs_free_io_channel(struct spdk_io_channel *channel)
{
spdk_put_io_channel(channel);
}
struct spdk_fs_thread_ctx *
spdk_fs_alloc_thread_ctx(struct spdk_filesystem *fs)
{
struct spdk_fs_thread_ctx *ctx;
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
return NULL;
}
_spdk_fs_channel_create(fs, &ctx->ch, 512);
ctx->ch.send_request = fs->send_request;
ctx->ch.sync = 1;
pthread_spin_init(&ctx->ch.lock, 0);
return ctx;
}
void
spdk_fs_free_thread_ctx(struct spdk_fs_thread_ctx *ctx)
{
assert(ctx->ch.sync == 1);
while (true) {
pthread_spin_lock(&ctx->ch.lock);
if (ctx->ch.outstanding_reqs == 0) {
pthread_spin_unlock(&ctx->ch.lock);
break;
}
pthread_spin_unlock(&ctx->ch.lock);
usleep(1000);
}
_spdk_fs_channel_destroy(NULL, &ctx->ch);
free(ctx);
}
void
spdk_fs_set_cache_size(uint64_t size_in_mb)
{
g_fs_cache_size = size_in_mb * 1024 * 1024;
}
uint64_t
spdk_fs_get_cache_size(void)
{
return g_fs_cache_size / (1024 * 1024);
}
static void __file_flush(void *ctx);
static void *
alloc_cache_memory_buffer(struct spdk_file *context)
{
struct spdk_file *file;
void *buf;
buf = spdk_mempool_get(g_cache_pool);
if (buf != NULL) {
return buf;
}
pthread_spin_lock(&g_caches_lock);
TAILQ_FOREACH(file, &g_caches, cache_tailq) {
if (!file->open_for_writing &&
file->priority == SPDK_FILE_PRIORITY_LOW &&
file != context) {
break;
}
}
pthread_spin_unlock(&g_caches_lock);
if (file != NULL) {
cache_free_buffers(file);
buf = spdk_mempool_get(g_cache_pool);
if (buf != NULL) {
return buf;
}
}
pthread_spin_lock(&g_caches_lock);
TAILQ_FOREACH(file, &g_caches, cache_tailq) {
if (!file->open_for_writing && file != context) {
break;
}
}
pthread_spin_unlock(&g_caches_lock);
if (file != NULL) {
cache_free_buffers(file);
buf = spdk_mempool_get(g_cache_pool);
if (buf != NULL) {
return buf;
}
}
pthread_spin_lock(&g_caches_lock);
TAILQ_FOREACH(file, &g_caches, cache_tailq) {
if (file != context) {
break;
}
}
pthread_spin_unlock(&g_caches_lock);
if (file != NULL) {
cache_free_buffers(file);
buf = spdk_mempool_get(g_cache_pool);
if (buf != NULL) {
return buf;
}
}
return NULL;
}
static struct cache_buffer *
cache_insert_buffer(struct spdk_file *file, uint64_t offset)
{
struct cache_buffer *buf;
int count = 0;
buf = calloc(1, sizeof(*buf));
if (buf == NULL) {
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "calloc failed\n");
return NULL;
}
buf->buf = alloc_cache_memory_buffer(file);
while (buf->buf == NULL) {
/*
* TODO: alloc_cache_memory_buffer() should eventually free
* some buffers. Need a more sophisticated check here, instead
* of just bailing if 100 tries does not result in getting a
* free buffer. This will involve using the sync channel's
* semaphore to block until a buffer becomes available.
*/
if (count++ == 100) {
SPDK_ERRLOG("Could not allocate cache buffer for file=%p on offset=%jx\n",
file, offset);
free(buf);
return NULL;
}
buf->buf = alloc_cache_memory_buffer(file);
}
buf->buf_size = CACHE_BUFFER_SIZE;
buf->offset = offset;
pthread_spin_lock(&g_caches_lock);
if (file->tree->present_mask == 0) {
TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq);
}
file->tree = spdk_tree_insert_buffer(file->tree, buf);
pthread_spin_unlock(&g_caches_lock);
return buf;
}
static struct cache_buffer *
cache_append_buffer(struct spdk_file *file)
{
struct cache_buffer *last;
assert(file->last == NULL || file->last->bytes_filled == file->last->buf_size);
assert((file->append_pos % CACHE_BUFFER_SIZE) == 0);
last = cache_insert_buffer(file, file->append_pos);
if (last == NULL) {
SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "cache_insert_buffer failed\n");
return NULL;
}
file->last = last;
return last;
}
static void __check_sync_reqs(struct spdk_file *file);
static void
__file_cache_finish_sync(void *ctx, int bserrno)
{
struct spdk_file *file;
struct spdk_fs_request *sync_req = ctx;
struct spdk_fs_cb_args *sync_args;
sync_args = &sync_req->args;
file = sync_args->file;
pthread_spin_lock(&file->lock);
file->length_xattr = sync_args->op.sync.length;
assert(sync_args->op.sync.offset <= file->length_flushed);
spdk_trace_record(TRACE_BLOBFS_XATTR_END, 0, sync_args->op.sync.offset,
0, file->trace_arg_name);
BLOBFS_TRACE(file, "sync done offset=%jx\n", sync_args->op.sync.offset);
TAILQ_REMOVE(&file->sync_requests, sync_req, args.op.sync.tailq);
pthread_spin_unlock(&file->lock);
sync_args->fn.file_op(sync_args->arg, bserrno);
pthread_spin_lock(&file->lock);
free_fs_request(sync_req);
pthread_spin_unlock(&file->lock);
__check_sync_reqs(file);
}
static void
__check_sync_reqs(struct spdk_file *file)
{
struct spdk_fs_request *sync_req;
pthread_spin_lock(&file->lock);
TAILQ_FOREACH(sync_req, &file->sync_requests, args.op.sync.tailq) {
if (sync_req->args.op.sync.offset <= file->length_flushed) {
break;
}
}
if (sync_req != NULL && !sync_req->args.op.sync.xattr_in_progress) {
BLOBFS_TRACE(file, "set xattr length 0x%jx\n", file->length_flushed);
sync_req->args.op.sync.xattr_in_progress = true;
sync_req->args.op.sync.length = file->length_flushed;
spdk_blob_set_xattr(file->blob, "length", &file->length_flushed,
sizeof(file->length_flushed));
pthread_spin_unlock(&file->lock);
spdk_trace_record(TRACE_BLOBFS_XATTR_START, 0, file->length_flushed,
0, file->trace_arg_name);
spdk_blob_sync_md(file->blob, __file_cache_finish_sync, sync_req);
} else {
pthread_spin_unlock(&file->lock);
}
}
static void
__file_flush_done(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *file = args->file;
struct cache_buffer *next = args->op.flush.cache_buffer;
BLOBFS_TRACE(file, "length=%jx\n", args->op.flush.length);
pthread_spin_lock(&file->lock);
next->in_progress = false;
next->bytes_flushed += args->op.flush.length;
file->length_flushed += args->op.flush.length;
if (file->length_flushed > file->length) {
file->length = file->length_flushed;
}
if (next->bytes_flushed == next->buf_size) {
BLOBFS_TRACE(file, "write buffer fully flushed 0x%jx\n", file->length_flushed);
next = spdk_tree_find_buffer(file->tree, file->length_flushed);
}
/*
* Assert that there is no cached data that extends past the end of the underlying
* blob.
*/
assert(next == NULL || next->offset < __file_get_blob_size(file) ||
next->bytes_filled == 0);
pthread_spin_unlock(&file->lock);
__check_sync_reqs(file);
__file_flush(req);
}
static void
__file_flush(void *ctx)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *file = args->file;
struct cache_buffer *next;
uint64_t offset, length, start_lba, num_lba;
uint32_t lba_size;
pthread_spin_lock(&file->lock);
next = spdk_tree_find_buffer(file->tree, file->length_flushed);
if (next == NULL || next->in_progress ||
((next->bytes_filled < next->buf_size) && TAILQ_EMPTY(&file->sync_requests))) {
/*
* There is either no data to flush, a flush I/O is already in
* progress, or the next buffer is partially filled but there's no
* outstanding request to sync it.
* So return immediately - if a flush I/O is in progress we will flush
* more data after that is completed, or a partial buffer will get flushed
* when it is either filled or the file is synced.
*/
free_fs_request(req);
if (next == NULL) {
/*
* For cases where a file's cache was evicted, and then the
* file was later appended, we will write the data directly
* to disk and bypass cache. So just update length_flushed
* here to reflect that all data was already written to disk.
*/
file->length_flushed = file->append_pos;
}
pthread_spin_unlock(&file->lock);
if (next == NULL) {
/*
* There is no data to flush, but we still need to check for any
* outstanding sync requests to make sure metadata gets updated.
*/
__check_sync_reqs(file);
}
return;
}
offset = next->offset + next->bytes_flushed;
length = next->bytes_filled - next->bytes_flushed;
if (length == 0) {
free_fs_request(req);
pthread_spin_unlock(&file->lock);
/*
* There is no data to flush, but we still need to check for any
* outstanding sync requests to make sure metadata gets updated.
*/
__check_sync_reqs(file);
return;
}
args->op.flush.length = length;
args->op.flush.cache_buffer = next;
__get_page_parameters(file, offset, length, &start_lba, &lba_size, &num_lba);
next->in_progress = true;
BLOBFS_TRACE(file, "offset=%jx length=%jx page start=%jx num=%jx\n",
offset, length, start_lba, num_lba);
pthread_spin_unlock(&file->lock);
spdk_blob_io_write(file->blob, file->fs->sync_target.sync_fs_channel->bs_channel,
next->buf + (start_lba * lba_size) - next->offset,
start_lba, num_lba, __file_flush_done, req);
}
static void
__file_extend_done(void *arg, int bserrno)
{
struct spdk_fs_cb_args *args = arg;
__wake_caller(args, bserrno);
}
static void
__file_extend_resize_cb(void *_args, int bserrno)
{
struct spdk_fs_cb_args *args = _args;
struct spdk_file *file = args->file;
if (bserrno) {
__wake_caller(args, bserrno);
return;
}
spdk_blob_sync_md(file->blob, __file_extend_done, args);
}
static void
__file_extend_blob(void *_args)
{
struct spdk_fs_cb_args *args = _args;
struct spdk_file *file = args->file;
spdk_blob_resize(file->blob, args->op.resize.num_clusters, __file_extend_resize_cb, args);
}
static void
__rw_from_file_done(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
__wake_caller(&req->args, bserrno);
free_fs_request(req);
}
static void
__rw_from_file(void *ctx)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *file = args->file;
if (args->op.rw.is_read) {
spdk_file_read_async(file, file->fs->sync_target.sync_io_channel, args->iovs[0].iov_base,
args->op.rw.offset, (uint64_t)args->iovs[0].iov_len,
__rw_from_file_done, req);
} else {
spdk_file_write_async(file, file->fs->sync_target.sync_io_channel, args->iovs[0].iov_base,
args->op.rw.offset, (uint64_t)args->iovs[0].iov_len,
__rw_from_file_done, req);
}
}
static int
__send_rw_from_file(struct spdk_file *file, void *payload,
uint64_t offset, uint64_t length, bool is_read,
struct spdk_fs_channel *channel)
{
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
req = alloc_fs_request_with_iov(channel, 1);
if (req == NULL) {
sem_post(&channel->sem);
return -ENOMEM;
}
args = &req->args;
args->file = file;
args->sem = &channel->sem;
args->iovs[0].iov_base = payload;
args->iovs[0].iov_len = (size_t)length;
args->op.rw.offset = offset;
args->op.rw.is_read = is_read;
file->fs->send_request(__rw_from_file, req);
return 0;
}
int
spdk_file_write(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx,
void *payload, uint64_t offset, uint64_t length)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
struct spdk_fs_request *flush_req;
uint64_t rem_length, copy, blob_size, cluster_sz;
uint32_t cache_buffers_filled = 0;
uint8_t *cur_payload;
struct cache_buffer *last;
BLOBFS_TRACE_RW(file, "offset=%jx length=%jx\n", offset, length);
if (length == 0) {
return 0;
}
if (offset != file->append_pos) {
BLOBFS_TRACE(file, " error offset=%jx append_pos=%jx\n", offset, file->append_pos);
return -EINVAL;
}
pthread_spin_lock(&file->lock);
file->open_for_writing = true;
if ((file->last == NULL) && (file->append_pos % CACHE_BUFFER_SIZE == 0)) {
cache_append_buffer(file);
}
if (file->last == NULL) {
int rc;
file->append_pos += length;
pthread_spin_unlock(&file->lock);
rc = __send_rw_from_file(file, payload, offset, length, false, channel);
sem_wait(&channel->sem);
return rc;
}
blob_size = __file_get_blob_size(file);
if ((offset + length) > blob_size) {
struct spdk_fs_cb_args extend_args = {};
cluster_sz = file->fs->bs_opts.cluster_sz;
extend_args.sem = &channel->sem;
extend_args.op.resize.num_clusters = __bytes_to_clusters((offset + length), cluster_sz);
extend_args.file = file;
BLOBFS_TRACE(file, "start resize to %u clusters\n", extend_args.op.resize.num_clusters);
pthread_spin_unlock(&file->lock);
file->fs->send_request(__file_extend_blob, &extend_args);
sem_wait(&channel->sem);
if (extend_args.rc) {
return extend_args.rc;
}
}
flush_req = alloc_fs_request(channel);
if (flush_req == NULL) {
pthread_spin_unlock(&file->lock);
return -ENOMEM;
}
last = file->last;
rem_length = length;
cur_payload = payload;
while (rem_length > 0) {
copy = last->buf_size - last->bytes_filled;
if (copy > rem_length) {
copy = rem_length;
}
BLOBFS_TRACE_RW(file, " fill offset=%jx length=%jx\n", file->append_pos, copy);
memcpy(&last->buf[last->bytes_filled], cur_payload, copy);
file->append_pos += copy;
if (file->length < file->append_pos) {
file->length = file->append_pos;
}
cur_payload += copy;
last->bytes_filled += copy;
rem_length -= copy;
if (last->bytes_filled == last->buf_size) {
cache_buffers_filled++;
last = cache_append_buffer(file);
if (last == NULL) {
BLOBFS_TRACE(file, "nomem\n");
free_fs_request(flush_req);
pthread_spin_unlock(&file->lock);
return -ENOMEM;
}
}
}
pthread_spin_unlock(&file->lock);
if (cache_buffers_filled == 0) {
free_fs_request(flush_req);
return 0;
}
flush_req->args.file = file;
file->fs->send_request(__file_flush, flush_req);
return 0;
}
static void
__readahead_done(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct cache_buffer *cache_buffer = args->op.readahead.cache_buffer;
struct spdk_file *file = args->file;
BLOBFS_TRACE(file, "offset=%jx\n", cache_buffer->offset);
pthread_spin_lock(&file->lock);
cache_buffer->bytes_filled = args->op.readahead.length;
cache_buffer->bytes_flushed = args->op.readahead.length;
cache_buffer->in_progress = false;
pthread_spin_unlock(&file->lock);
free_fs_request(req);
}
static void
__readahead(void *ctx)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *file = args->file;
uint64_t offset, length, start_lba, num_lba;
uint32_t lba_size;
offset = args->op.readahead.offset;
length = args->op.readahead.length;
assert(length > 0);
__get_page_parameters(file, offset, length, &start_lba, &lba_size, &num_lba);
BLOBFS_TRACE(file, "offset=%jx length=%jx page start=%jx num=%jx\n",
offset, length, start_lba, num_lba);
spdk_blob_io_read(file->blob, file->fs->sync_target.sync_fs_channel->bs_channel,
args->op.readahead.cache_buffer->buf,
start_lba, num_lba, __readahead_done, req);
}
static uint64_t
__next_cache_buffer_offset(uint64_t offset)
{
return (offset + CACHE_BUFFER_SIZE) & ~(CACHE_TREE_LEVEL_MASK(0));
}
static void
check_readahead(struct spdk_file *file, uint64_t offset,
struct spdk_fs_channel *channel)
{
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
offset = __next_cache_buffer_offset(offset);
if (spdk_tree_find_buffer(file->tree, offset) != NULL || file->length <= offset) {
return;
}
req = alloc_fs_request(channel);
if (req == NULL) {
return;
}
args = &req->args;
BLOBFS_TRACE(file, "offset=%jx\n", offset);
args->file = file;
args->op.readahead.offset = offset;
args->op.readahead.cache_buffer = cache_insert_buffer(file, offset);
if (!args->op.readahead.cache_buffer) {
BLOBFS_TRACE(file, "Cannot allocate buf for offset=%jx\n", offset);
free_fs_request(req);
return;
}
args->op.readahead.cache_buffer->in_progress = true;
if (file->length < (offset + CACHE_BUFFER_SIZE)) {
args->op.readahead.length = file->length & (CACHE_BUFFER_SIZE - 1);
} else {
args->op.readahead.length = CACHE_BUFFER_SIZE;
}
file->fs->send_request(__readahead, req);
}
static int
__file_read(struct spdk_file *file, void *payload, uint64_t offset, uint64_t length,
struct spdk_fs_channel *channel)
{
struct cache_buffer *buf;
int rc;
buf = spdk_tree_find_filled_buffer(file->tree, offset);
if (buf == NULL) {
pthread_spin_unlock(&file->lock);
rc = __send_rw_from_file(file, payload, offset, length, true, channel);
pthread_spin_lock(&file->lock);
return rc;
}
if ((offset + length) > (buf->offset + buf->bytes_filled)) {
length = buf->offset + buf->bytes_filled - offset;
}
BLOBFS_TRACE(file, "read %p offset=%ju length=%ju\n", payload, offset, length);
memcpy(payload, &buf->buf[offset - buf->offset], length);
if ((offset + length) % CACHE_BUFFER_SIZE == 0) {
pthread_spin_lock(&g_caches_lock);
spdk_tree_remove_buffer(file->tree, buf);
if (file->tree->present_mask == 0) {
TAILQ_REMOVE(&g_caches, file, cache_tailq);
}
pthread_spin_unlock(&g_caches_lock);
}
sem_post(&channel->sem);
return 0;
}
int64_t
spdk_file_read(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx,
void *payload, uint64_t offset, uint64_t length)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
uint64_t final_offset, final_length;
uint32_t sub_reads = 0;
int rc = 0;
pthread_spin_lock(&file->lock);
BLOBFS_TRACE_RW(file, "offset=%ju length=%ju\n", offset, length);
file->open_for_writing = false;
if (length == 0 || offset >= file->append_pos) {
pthread_spin_unlock(&file->lock);
return 0;
}
if (offset + length > file->append_pos) {
length = file->append_pos - offset;
}
if (offset != file->next_seq_offset) {
file->seq_byte_count = 0;
}
file->seq_byte_count += length;
file->next_seq_offset = offset + length;
if (file->seq_byte_count >= CACHE_READAHEAD_THRESHOLD) {
check_readahead(file, offset, channel);
check_readahead(file, offset + CACHE_BUFFER_SIZE, channel);
}
final_length = 0;
final_offset = offset + length;
while (offset < final_offset) {
length = NEXT_CACHE_BUFFER_OFFSET(offset) - offset;
if (length > (final_offset - offset)) {
length = final_offset - offset;
}
rc = __file_read(file, payload, offset, length, channel);
if (rc == 0) {
final_length += length;
} else {
break;
}
payload += length;
offset += length;
sub_reads++;
}
pthread_spin_unlock(&file->lock);
while (sub_reads-- > 0) {
sem_wait(&channel->sem);
}
if (rc == 0) {
return final_length;
} else {
return rc;
}
}
static void
_file_sync(struct spdk_file *file, struct spdk_fs_channel *channel,
spdk_file_op_complete cb_fn, void *cb_arg)
{
struct spdk_fs_request *sync_req;
struct spdk_fs_request *flush_req;
struct spdk_fs_cb_args *sync_args;
struct spdk_fs_cb_args *flush_args;
BLOBFS_TRACE(file, "offset=%jx\n", file->append_pos);
pthread_spin_lock(&file->lock);
if (file->append_pos <= file->length_xattr) {
BLOBFS_TRACE(file, "done - file already synced\n");
pthread_spin_unlock(&file->lock);
cb_fn(cb_arg, 0);
return;
}
sync_req = alloc_fs_request(channel);
if (!sync_req) {
pthread_spin_unlock(&file->lock);
cb_fn(cb_arg, -ENOMEM);
return;
}
sync_args = &sync_req->args;
flush_req = alloc_fs_request(channel);
if (!flush_req) {
pthread_spin_unlock(&file->lock);
cb_fn(cb_arg, -ENOMEM);
return;
}
flush_args = &flush_req->args;
sync_args->file = file;
sync_args->fn.file_op = cb_fn;
sync_args->arg = cb_arg;
sync_args->op.sync.offset = file->append_pos;
sync_args->op.sync.xattr_in_progress = false;
TAILQ_INSERT_TAIL(&file->sync_requests, sync_req, args.op.sync.tailq);
pthread_spin_unlock(&file->lock);
flush_args->file = file;
channel->send_request(__file_flush, flush_req);
}
int
spdk_file_sync(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
struct spdk_fs_cb_args args = {};
args.sem = &channel->sem;
_file_sync(file, channel, __wake_caller, &args);
sem_wait(&channel->sem);
return args.rc;
}
void
spdk_file_sync_async(struct spdk_file *file, struct spdk_io_channel *_channel,
spdk_file_op_complete cb_fn, void *cb_arg)
{
struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
_file_sync(file, channel, cb_fn, cb_arg);
}
void
spdk_file_set_priority(struct spdk_file *file, uint32_t priority)
{
BLOBFS_TRACE(file, "priority=%u\n", priority);
file->priority = priority;
}
/*
* Close routines
*/
static void
__file_close_async_done(void *ctx, int bserrno)
{
struct spdk_fs_request *req = ctx;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *file = args->file;
spdk_trace_record(TRACE_BLOBFS_CLOSE, 0, 0, 0, file->trace_arg_name);
if (file->is_deleted) {
spdk_fs_delete_file_async(file->fs, file->name, blob_delete_cb, ctx);
return;
}
args->fn.file_op(args->arg, bserrno);
free_fs_request(req);
}
static void
__file_close_async(struct spdk_file *file, struct spdk_fs_request *req)
{
struct spdk_blob *blob;
pthread_spin_lock(&file->lock);
if (file->ref_count == 0) {
pthread_spin_unlock(&file->lock);
__file_close_async_done(req, -EBADF);
return;
}
file->ref_count--;
if (file->ref_count > 0) {
pthread_spin_unlock(&file->lock);
req->args.fn.file_op(req->args.arg, 0);
free_fs_request(req);
return;
}
pthread_spin_unlock(&file->lock);
blob = file->blob;
file->blob = NULL;
spdk_blob_close(blob, __file_close_async_done, req);
}
static void
__file_close_async__sync_done(void *arg, int fserrno)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
__file_close_async(args->file, req);
}
void
spdk_file_close_async(struct spdk_file *file, spdk_file_op_complete cb_fn, void *cb_arg)
{
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
req = alloc_fs_request(file->fs->md_target.md_fs_channel);
if (req == NULL) {
cb_fn(cb_arg, -ENOMEM);
return;
}
args = &req->args;
args->file = file;
args->fn.file_op = cb_fn;
args->arg = cb_arg;
spdk_file_sync_async(file, file->fs->md_target.md_io_channel, __file_close_async__sync_done, req);
}
static void
__file_close(void *arg)
{
struct spdk_fs_request *req = arg;
struct spdk_fs_cb_args *args = &req->args;
struct spdk_file *file = args->file;
__file_close_async(file, req);
}
int
spdk_file_close(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx)
{
struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
struct spdk_fs_request *req;
struct spdk_fs_cb_args *args;
req = alloc_fs_request(channel);
if (req == NULL) {
return -ENOMEM;
}
args = &req->args;
spdk_file_sync(file, ctx);
BLOBFS_TRACE(file, "name=%s\n", file->name);
args->file = file;
args->sem = &channel->sem;
args->fn.file_op = __wake_caller;
args->arg = args;
channel->send_request(__file_close, req);
sem_wait(&channel->sem);
return args->rc;
}
int
spdk_file_get_id(struct spdk_file *file, void *id, size_t size)
{
if (size < sizeof(spdk_blob_id)) {
return -EINVAL;
}
memcpy(id, &file->blobid, sizeof(spdk_blob_id));
return sizeof(spdk_blob_id);
}
static void
cache_free_buffers(struct spdk_file *file)
{
BLOBFS_TRACE(file, "free=%s\n", file->name);
pthread_spin_lock(&file->lock);
pthread_spin_lock(&g_caches_lock);
if (file->tree->present_mask == 0) {
pthread_spin_unlock(&g_caches_lock);
pthread_spin_unlock(&file->lock);
return;
}
spdk_tree_free_buffers(file->tree);
TAILQ_REMOVE(&g_caches, file, cache_tailq);
/* If not freed, put it in the end of the queue */
if (file->tree->present_mask != 0) {
TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq);
}
file->last = NULL;
pthread_spin_unlock(&g_caches_lock);
pthread_spin_unlock(&file->lock);
}
SPDK_LOG_REGISTER_COMPONENT("blobfs", SPDK_LOG_BLOBFS)
SPDK_LOG_REGISTER_COMPONENT("blobfs_rw", SPDK_LOG_BLOBFS_RW)