Jim Harris 179ed697b3 blob: add readv/writev support
Most of the work here revolves around having to split
an I/O that spans a cluster boundary.  In this case
we need to allocate a separate iov array, and then
issue each sub-I/O serially, copying the relevant
subset of the original iov array.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I0d46b3f832245900d109ee6c78cc6d49cf96428b

Reviewed-on: https://review.gerrithub.io/374880
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2017-08-22 18:19:00 -04:00

280 lines
12 KiB
C

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* Blob Storage System
*
* The blob storage system, or the blobstore for short, is a low level
* library for placing opaque blobs of data onto a storage device such
* that scattered physical blocks on the storage device appear as a
* single, contiguous storage region. These blobs are also persistent,
* which means they are rediscoverable after reboot or power loss.
*
* The blobstore is designed to be very high performance, and thus has
* a few general rules regarding thread safety to avoid taking locks
* in the I/O path. Functions starting with the prefix "spdk_bs_md" must only
* be called from the metadata thread, of which there is only one at a time.
* The user application can declare which thread is the metadata thread by
* calling \ref spdk_bs_register_md_thread, but by default it is the thread
* that was used to create the blobstore initially. The metadata thread can
* be changed at run time by first unregistering
* (\ref spdk_bs_unregister_md_thread) and then re-registering. Registering
* a thread as the metadata thread is expensive and should be avoided.
*
* Functions starting with the prefix "spdk_bs_io" are passed a channel
* as an argument, and channels may only be used from the thread they were
* created on. See \ref spdk_bs_alloc_io_channel.
*
* Functions not starting with one of those two prefixes are thread safe
* and may be called from any thread at any time.
*
* The blob store returns errors using negated POSIX errno values, either
* returned in the callback or as a return value. An errno value of 0 means
* success.
*/
#ifndef SPDK_BLOB_H
#define SPDK_BLOB_H
#include "spdk/stdinc.h"
typedef uint64_t spdk_blob_id;
#define SPDK_BLOBID_INVALID (uint64_t)-1
struct spdk_blob_store;
struct spdk_io_channel;
struct spdk_blob;
struct spdk_xattr_names;
typedef void (*spdk_bs_op_complete)(void *cb_arg, int bserrno);
typedef void (*spdk_bs_op_with_handle_complete)(void *cb_arg, struct spdk_blob_store *bs,
int bserrno);
typedef void (*spdk_blob_op_complete)(void *cb_arg, int bserrno);
typedef void (*spdk_blob_op_with_id_complete)(void *cb_arg, spdk_blob_id blobid, int bserrno);
typedef void (*spdk_blob_op_with_handle_complete)(void *cb_arg, struct spdk_blob *blb, int bserrno);
/* Calls to function pointers of this type must obey all of the normal
rules for channels. The channel passed to this completion must match
the channel the operation was initiated on. */
typedef void (*spdk_bs_dev_cpl)(struct spdk_io_channel *channel,
void *cb_arg, int bserrno);
struct spdk_bs_dev_cb_args {
spdk_bs_dev_cpl cb_fn;
struct spdk_io_channel *channel;
void *cb_arg;
};
struct spdk_bs_dev {
/* Create a new channel which is a software construct that is used
* to submit I/O. */
struct spdk_io_channel *(*create_channel)(struct spdk_bs_dev *dev);
/* Destroy a previously created channel */
void (*destroy_channel)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel);
/* Destroy this blobstore device. Applications must not destroy the blobstore device,
* rather the blobstore will destroy it using this function pointer once all
* references to it during unload callback context have been completed.
*/
void (*destroy)(struct spdk_bs_dev *dev);
void (*read)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args);
void (*write)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args);
void (*readv)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
struct iovec *iov, int iovcnt,
uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args);
void (*writev)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
struct iovec *iov, int iovcnt,
uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args);
void (*flush)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
struct spdk_bs_dev_cb_args *cb_args);
void (*unmap)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args);
uint64_t blockcnt;
uint32_t blocklen; /* In bytes */
};
struct spdk_bs_opts {
uint32_t cluster_sz; /* In bytes. Must be multiple of page size. */
uint32_t num_md_pages; /* Count of the number of pages reserved for metadata */
uint32_t max_md_ops; /* Maximum simultaneous metadata operations */
uint32_t max_channel_ops; /* Maximum simultaneous operations per channel */
};
/* Initialize an spdk_bs_opts structure to the default blobstore option values. */
void spdk_bs_opts_init(struct spdk_bs_opts *opts);
/* Load a blob store from the given device. This will fail (return NULL) if no blob store is present. */
void spdk_bs_load(struct spdk_bs_dev *dev,
spdk_bs_op_with_handle_complete cb_fn, void *cb_arg);
/* Initialize a blob store on the given disk. Destroys all data present on the device. */
void spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts,
spdk_bs_op_with_handle_complete cb_fn, void *cb_arg);
/* Flush all volatile data to disk and destroy in-memory structures. */
void spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg);
/* Set the given blob as the super blob. This will be retrievable immediately after an
* spdk_bs_load on the next initialization.
*/
void spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
spdk_bs_op_complete cb_fn, void *cb_arg);
/* Open the super blob. */
void spdk_bs_get_super(struct spdk_blob_store *bs,
spdk_blob_op_with_id_complete cb_fn, void *cb_arg);
/* Get the cluster size in bytes. Used in the extend operation. */
uint64_t spdk_bs_get_cluster_size(struct spdk_blob_store *bs);
/* Get the page size in bytes. This is the write and read granularity of blobs. */
uint64_t spdk_bs_get_page_size(struct spdk_blob_store *bs);
/* Get the number of free clusters. */
uint64_t spdk_bs_free_cluster_count(struct spdk_blob_store *bs);
/* Register the current thread as the metadata thread. All functions beginning with
* the prefix "spdk_bs_md" must be called only from this thread.
*/
int spdk_bs_register_md_thread(struct spdk_blob_store *bs);
/* Unregister the current thread as the metadata thread. This allows a different
* thread to be registered.
*/
int spdk_bs_unregister_md_thread(struct spdk_blob_store *bs);
/* Return the blobid */
spdk_blob_id spdk_blob_get_id(struct spdk_blob *blob);
/* Return the number of pages allocated to the blob */
uint64_t spdk_blob_get_num_pages(struct spdk_blob *blob);
/* Return the number of clusters allocated to the blob */
uint64_t spdk_blob_get_num_clusters(struct spdk_blob *blob);
/* Create a new blob with initial size of 'sz' clusters. */
void spdk_bs_md_create_blob(struct spdk_blob_store *bs,
spdk_blob_op_with_id_complete cb_fn, void *cb_arg);
/* Delete an existing blob. */
void spdk_bs_md_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
spdk_blob_op_complete cb_fn, void *cb_arg);
/* Open a blob */
void spdk_bs_md_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
/* Resize a blob to 'sz' clusters.
*
* These changes are not persisted to disk until
* spdk_bs_md_sync_blob() is called. */
int spdk_bs_md_resize_blob(struct spdk_blob *blob, size_t sz);
/* Sync a blob */
/* Make a blob persistent. This applies to open, resize, set xattr,
* and remove xattr. These operations will not be persistent until
* the blob has been synced.
*
* I/O operations (read/write) are synced independently. See
* spdk_bs_io_flush_channel().
*/
void spdk_bs_md_sync_blob(struct spdk_blob *blob,
spdk_blob_op_complete cb_fn, void *cb_arg);
/* Close a blob. This will automatically sync. */
void spdk_bs_md_close_blob(struct spdk_blob **blob, spdk_blob_op_complete cb_fn, void *cb_arg);
struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs);
void spdk_bs_free_io_channel(struct spdk_io_channel *channel);
/* Force all previously completed operations on this channel to be persistent. */
void spdk_bs_io_flush_channel(struct spdk_io_channel *channel,
spdk_blob_op_complete cb_fn, void *cb_arg);
/* Write data to a blob. Offset is in pages from the beginning of the blob. */
void spdk_bs_io_write_blob(struct spdk_blob *blob, struct spdk_io_channel *channel,
void *payload, uint64_t offset, uint64_t length,
spdk_blob_op_complete cb_fn, void *cb_arg);
/* Read data from a blob. Offset is in pages from the beginning of the blob. */
void spdk_bs_io_read_blob(struct spdk_blob *blob, struct spdk_io_channel *channel,
void *payload, uint64_t offset, uint64_t length,
spdk_blob_op_complete cb_fn, void *cb_arg);
/* Write data to a blob. Offset is in pages from the beginning of the blob. */
void spdk_bs_io_writev_blob(struct spdk_blob *blob, struct spdk_io_channel *channel,
struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
spdk_blob_op_complete cb_fn, void *cb_arg);
/* Read data from a blob. Offset is in pages from the beginning of the blob. */
void spdk_bs_io_readv_blob(struct spdk_blob *blob, struct spdk_io_channel *channel,
struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
spdk_blob_op_complete cb_fn, void *cb_arg);
/* Iterate through all blobs */
void spdk_bs_md_iter_first(struct spdk_blob_store *bs,
spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
void spdk_bs_md_iter_next(struct spdk_blob_store *bs, struct spdk_blob **blob,
spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
int spdk_blob_md_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
uint16_t value_len);
int spdk_blob_md_remove_xattr(struct spdk_blob *blob, const char *name);
int spdk_bs_md_get_xattr_value(struct spdk_blob *blob, const char *name,
const void **value, size_t *value_len);
int spdk_bs_md_get_xattr_names(struct spdk_blob *blob,
struct spdk_xattr_names **names);
uint32_t spdk_xattr_names_get_count(struct spdk_xattr_names *names);
const char *spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index);
void spdk_xattr_names_free(struct spdk_xattr_names *names);
#endif /* SPDK_BLOB_H_ */