numam-spdk/lib/ftl/ftl_init.c
Wojciech Malikowski 3505d3a65c lib/ftl: Set l2p table initial state with memset()
This patch improve FTL initialization time.
For 16TB volume it reduces init/restore time
by about 30 seconds.

Change-Id: I76463c5447b983eb9428c0317933d06c40b2974e
Signed-off-by: Wojciech Malikowski <wojciech.malikowski@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/546
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Maciej Szczepaniak <maciej.szczepaniak@intel.com>
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
2020-02-20 09:51:28 +00:00

1398 lines
34 KiB
C

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spdk/stdinc.h"
#include "spdk/nvme.h"
#include "spdk/io_channel.h"
#include "spdk/string.h"
#include "spdk/likely.h"
#include "spdk_internal/log.h"
#include "spdk/ftl.h"
#include "spdk/likely.h"
#include "spdk/string.h"
#include "spdk/bdev_zone.h"
#include "spdk/bdev_module.h"
#include "ftl_core.h"
#include "ftl_io.h"
#include "ftl_reloc.h"
#include "ftl_rwb.h"
#include "ftl_band.h"
#include "ftl_debug.h"
#define FTL_CORE_RING_SIZE 4096
#define FTL_INIT_TIMEOUT 30
#define FTL_NSID 1
#define FTL_ZONE_INFO_COUNT 64
/* Dummy bdev module used to to claim bdevs. */
static struct spdk_bdev_module g_ftl_bdev_module = {
.name = "ftl_lib",
};
typedef void (*spdk_ftl_init_fn)(struct spdk_ftl_dev *, void *, int);
struct ftl_dev_init_ctx {
/* Owner */
struct spdk_ftl_dev *dev;
/* Initial arguments */
struct spdk_ftl_dev_init_opts opts;
/* IO channel for zone info retrieving */
struct spdk_io_channel *ioch;
/* Buffer for reading zone info */
struct spdk_bdev_zone_info info[FTL_ZONE_INFO_COUNT];
/* Currently read zone */
size_t zone_id;
/* User's callback */
spdk_ftl_init_fn cb_fn;
/* Callback's argument */
void *cb_arg;
/* Thread to call the callback on */
struct spdk_thread *thread;
/* Poller to check if the device has been destroyed/initialized */
struct spdk_poller *poller;
/* Status to return for halt completion callback */
int halt_complete_status;
};
static STAILQ_HEAD(, spdk_ftl_dev) g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
static pthread_mutex_t g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
static const struct spdk_ftl_conf g_default_conf = {
.limits = {
/* 5 free bands / 0 % host writes */
[SPDK_FTL_LIMIT_CRIT] = { .thld = 5, .limit = 0 },
/* 10 free bands / 5 % host writes */
[SPDK_FTL_LIMIT_HIGH] = { .thld = 10, .limit = 5 },
/* 20 free bands / 40 % host writes */
[SPDK_FTL_LIMIT_LOW] = { .thld = 20, .limit = 40 },
/* 40 free bands / 100 % host writes - defrag starts running */
[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
},
/* 10 percent valid blocks */
.invalid_thld = 10,
/* 20% spare blocks */
.lba_rsvd = 20,
/* 6M write buffer */
.rwb_size = 6 * 1024 * 1024,
/* 90% band fill threshold */
.band_thld = 90,
/* Max 32 IO depth per band relocate */
.max_reloc_qdepth = 32,
/* Max 3 active band relocates */
.max_active_relocs = 3,
/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
.user_io_pool_size = 2048,
/* Number of interleaving units per ws_opt */
/* 1 for default and 3 for 3D TLC NAND */
.num_interleave_units = 1,
/*
* If clear ftl will return error when restoring after a dirty shutdown
* If set, last band will be padded, ftl will restore based only on closed bands - this
* will result in lost data after recovery.
*/
.allow_open_bands = false,
.nv_cache = {
/* Maximum number of concurrent requests */
.max_request_cnt = 2048,
/* Maximum number of blocks per request */
.max_request_size = 16,
}
};
static int
ftl_band_init_md(struct ftl_band *band)
{
struct ftl_lba_map *lba_map = &band->lba_map;
lba_map->vld = spdk_bit_array_create(ftl_get_num_blocks_in_band(band->dev));
if (!lba_map->vld) {
return -ENOMEM;
}
pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
ftl_band_md_clear(band);
return 0;
}
static int
ftl_check_conf(const struct spdk_ftl_dev *dev, const struct spdk_ftl_conf *conf)
{
size_t i;
if (conf->invalid_thld >= 100) {
return -1;
}
if (conf->lba_rsvd >= 100) {
return -1;
}
if (conf->lba_rsvd == 0) {
return -1;
}
if (conf->rwb_size == 0) {
return -1;
}
if (conf->rwb_size % FTL_BLOCK_SIZE != 0) {
return -1;
}
if (dev->xfer_size % conf->num_interleave_units != 0) {
return -1;
}
for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
if (conf->limits[i].limit > 100) {
return -1;
}
}
return 0;
}
static int
ftl_dev_init_bands(struct spdk_ftl_dev *dev)
{
struct ftl_band *band, *pband;
unsigned int i;
int rc = 0;
LIST_INIT(&dev->free_bands);
LIST_INIT(&dev->shut_bands);
dev->num_free = 0;
dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
if (!dev->bands) {
return -1;
}
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
band = &dev->bands[i];
band->id = i;
band->dev = dev;
band->state = FTL_BAND_STATE_CLOSED;
if (LIST_EMPTY(&dev->shut_bands)) {
LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
} else {
LIST_INSERT_AFTER(pband, band, list_entry);
}
pband = band;
CIRCLEQ_INIT(&band->zones);
band->zone_buf = calloc(ftl_get_num_punits(dev), sizeof(*band->zone_buf));
if (!band->zone_buf) {
SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
rc = -1;
break;
}
rc = ftl_band_init_md(band);
if (rc) {
SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
break;
}
band->reloc_bitmap = spdk_bit_array_create(ftl_get_num_bands(dev));
if (!band->reloc_bitmap) {
SPDK_ERRLOG("Failed to allocate band relocation bitmap\n");
break;
}
}
return rc;
}
static void
ftl_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
{
struct spdk_ftl_dev *dev = event_ctx;
switch (type) {
case SPDK_BDEV_EVENT_REMOVE:
assert(0);
break;
case SPDK_BDEV_EVENT_MEDIA_MANAGEMENT:
assert(bdev == spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
ftl_get_media_events(dev);
default:
break;
}
}
static int
ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, const char *bdev_name)
{
struct spdk_bdev *bdev;
struct spdk_ftl_conf *conf = &dev->conf;
struct ftl_nv_cache *nv_cache = &dev->nv_cache;
char pool_name[128];
int rc;
if (!bdev_name) {
return 0;
}
bdev = spdk_bdev_get_by_name(bdev_name);
if (!bdev) {
SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
return -1;
}
if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
dev, &nv_cache->bdev_desc)) {
SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
return -1;
}
if (spdk_bdev_module_claim_bdev(bdev, nv_cache->bdev_desc, &g_ftl_bdev_module)) {
spdk_bdev_close(nv_cache->bdev_desc);
nv_cache->bdev_desc = NULL;
SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
return -1;
}
SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
spdk_bdev_get_name(bdev));
if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
return -1;
}
if (!spdk_bdev_is_md_separate(bdev)) {
SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
spdk_bdev_get_name(bdev));
return -1;
}
if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
return -1;
}
if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
SPDK_ERRLOG("Unsupported DIF type used by bdev %s\n",
spdk_bdev_get_name(bdev));
return -1;
}
/* The cache needs to be capable of storing at least two full bands. This requirement comes
* from the fact that cache works as a protection against power loss, so before the data
* inside the cache can be overwritten, the band it's stored on has to be closed. Plus one
* extra block is needed to store the header.
*/
if (spdk_bdev_get_num_blocks(bdev) < ftl_get_num_blocks_in_band(dev) * 2 + 1) {
SPDK_ERRLOG("Insufficient number of blocks for write buffer cache (available: %"
PRIu64", required: %"PRIu64")\n", spdk_bdev_get_num_blocks(bdev),
ftl_get_num_blocks_in_band(dev) * 2 + 1);
return -1;
}
rc = snprintf(pool_name, sizeof(pool_name), "ftl-nvpool-%p", dev);
if (rc < 0 || rc >= 128) {
return -1;
}
nv_cache->md_pool = spdk_mempool_create(pool_name, conf->nv_cache.max_request_cnt,
spdk_bdev_get_md_size(bdev) *
conf->nv_cache.max_request_size,
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY);
if (!nv_cache->md_pool) {
SPDK_ERRLOG("Failed to initialize non-volatile cache metadata pool\n");
return -1;
}
nv_cache->dma_buf = spdk_dma_zmalloc(FTL_BLOCK_SIZE, spdk_bdev_get_buf_align(bdev), NULL);
if (!nv_cache->dma_buf) {
SPDK_ERRLOG("Memory allocation failure\n");
return -1;
}
if (pthread_spin_init(&nv_cache->lock, PTHREAD_PROCESS_PRIVATE)) {
SPDK_ERRLOG("Failed to initialize cache lock\n");
return -1;
}
nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET;
nv_cache->num_data_blocks = spdk_bdev_get_num_blocks(bdev) - 1;
nv_cache->num_available = nv_cache->num_data_blocks;
nv_cache->ready = false;
return 0;
}
void
spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
{
*conf = g_default_conf;
}
static void
ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
{
struct ftl_lba_map_request *request = obj;
struct spdk_ftl_dev *dev = opaque;
request->segments = spdk_bit_array_create(spdk_divide_round_up(
ftl_get_num_blocks_in_band(dev), FTL_NUM_LBA_IN_BLOCK));
}
static int
ftl_init_media_events_pool(struct spdk_ftl_dev *dev)
{
char pool_name[128];
int rc;
rc = snprintf(pool_name, sizeof(pool_name), "ftl-media-%p", dev);
if (rc < 0 || rc >= (int)sizeof(pool_name)) {
SPDK_ERRLOG("Failed to create media pool name\n");
return -1;
}
dev->media_events_pool = spdk_mempool_create(pool_name, 1024,
sizeof(struct ftl_media_event),
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY);
if (!dev->media_events_pool) {
SPDK_ERRLOG("Failed to create media events pool\n");
return -1;
}
return 0;
}
static int
ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
{
#define POOL_NAME_LEN 128
char pool_name[POOL_NAME_LEN];
int rc;
rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lba-pool");
if (rc < 0 || rc >= POOL_NAME_LEN) {
return -ENAMETOOLONG;
}
/* We need to reserve at least 2 buffers for band close / open sequence
* alone, plus additional (8) buffers for handling write errors.
* TODO: This memory pool is utilized only by core thread - it introduce
* unnecessary overhead and should be replaced by different data structure.
*/
dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
ftl_lba_map_pool_elem_size(dev),
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY);
if (!dev->lba_pool) {
return -ENOMEM;
}
rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lbareq-pool");
if (rc < 0 || rc >= POOL_NAME_LEN) {
return -ENAMETOOLONG;
}
dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
sizeof(struct ftl_lba_map_request),
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY,
ftl_lba_map_request_ctor,
dev);
if (!dev->lba_request_pool) {
return -ENOMEM;
}
return 0;
}
static void
ftl_init_wptr_list(struct spdk_ftl_dev *dev)
{
LIST_INIT(&dev->wptr_list);
LIST_INIT(&dev->flush_list);
LIST_INIT(&dev->band_flush_list);
}
static size_t
ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
{
struct ftl_band *band;
size_t seq = 0;
LIST_FOREACH(band, &dev->shut_bands, list_entry) {
if (band->seq > seq) {
seq = band->seq;
}
}
return seq;
}
static void
_ftl_init_bands_state(void *ctx)
{
struct ftl_band *band, *temp_band;
struct spdk_ftl_dev *dev = ctx;
dev->seq = ftl_dev_band_max_seq(dev);
LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
if (!band->lba_map.num_vld) {
ftl_band_set_state(band, FTL_BAND_STATE_FREE);
}
}
ftl_reloc_resume(dev->reloc);
/* Clear the limit applications as they're incremented incorrectly by */
/* the initialization code */
memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
}
static int
ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
{
struct ftl_band *band;
int cnt = 0;
LIST_FOREACH(band, &dev->shut_bands, list_entry) {
if (band->num_zones && !band->lba_map.num_vld) {
cnt++;
}
}
return cnt;
}
static int
ftl_init_bands_state(struct spdk_ftl_dev *dev)
{
/* TODO: Should we abort initialization or expose read only device */
/* if there is no free bands? */
/* If we abort initialization should we depend on condition that */
/* we have no free bands or should we have some minimal number of */
/* free bands? */
if (!ftl_init_num_free_bands(dev)) {
return -1;
}
spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
return 0;
}
static void
_ftl_dev_init_core_thread(void *ctx)
{
struct ftl_thread *thread = ctx;
struct spdk_ftl_dev *dev = thread->dev;
thread->poller = spdk_poller_register(thread->poller_fn, thread, thread->period_us);
if (!thread->poller) {
SPDK_ERRLOG("Unable to register poller\n");
assert(0);
}
thread->ioch = spdk_get_io_channel(dev);
}
static int
ftl_dev_init_core_thread(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
{
struct ftl_thread *thread = &dev->core_thread;
if (!opts->core_thread) {
return -1;
}
thread->dev = dev;
thread->poller_fn = ftl_task_core;
thread->thread = opts->core_thread;
thread->period_us = 0;
spdk_thread_send_msg(opts->core_thread, _ftl_dev_init_core_thread, thread);
return 0;
}
static void
ftl_dev_free_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread)
{
assert(thread->poller == NULL);
spdk_put_io_channel(thread->ioch);
thread->thread = NULL;
thread->ioch = NULL;
}
static int
ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
{
size_t addr_size;
if (dev->num_lbas == 0) {
SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Invalid l2p table size\n");
return -1;
}
if (dev->l2p) {
SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "L2p table already allocated\n");
return -1;
}
addr_size = dev->addr_len >= 32 ? 8 : 4;
dev->l2p = malloc(dev->num_lbas * addr_size);
if (!dev->l2p) {
SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Failed to allocate l2p table\n");
return -1;
}
memset(dev->l2p, FTL_ADDR_INVALID, dev->num_lbas * addr_size);
return 0;
}
static void
ftl_dev_free_init_ctx(struct ftl_dev_init_ctx *init_ctx)
{
if (!init_ctx) {
return;
}
if (init_ctx->ioch) {
spdk_put_io_channel(init_ctx->ioch);
}
free(init_ctx);
}
static void
ftl_call_init_complete_cb(void *ctx)
{
struct ftl_dev_init_ctx *init_ctx = ctx;
struct spdk_ftl_dev *dev = init_ctx->dev;
if (init_ctx->cb_fn != NULL) {
init_ctx->cb_fn(dev, init_ctx->cb_arg, 0);
}
ftl_dev_free_init_ctx(init_ctx);
}
static void
ftl_init_complete(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
pthread_mutex_lock(&g_ftl_queue_lock);
STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
pthread_mutex_unlock(&g_ftl_queue_lock);
dev->initialized = 1;
spdk_thread_send_msg(init_ctx->thread, ftl_call_init_complete_cb, init_ctx);
}
static void
ftl_init_fail_cb(struct spdk_ftl_dev *dev, void *ctx, int status)
{
struct ftl_dev_init_ctx *init_ctx = ctx;
if (init_ctx->cb_fn != NULL) {
init_ctx->cb_fn(NULL, init_ctx->cb_arg, -ENODEV);
}
ftl_dev_free_init_ctx(init_ctx);
}
static int _spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
struct spdk_thread *thread);
static void
ftl_init_fail(struct ftl_dev_init_ctx *init_ctx)
{
if (_spdk_ftl_dev_free(init_ctx->dev, ftl_init_fail_cb, init_ctx, init_ctx->thread)) {
SPDK_ERRLOG("Unable to free the device\n");
assert(0);
}
}
static void
ftl_write_nv_cache_md_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
struct spdk_ftl_dev *dev = init_ctx->dev;
spdk_bdev_free_io(bdev_io);
if (spdk_unlikely(!success)) {
SPDK_ERRLOG("Writing non-volatile cache's metadata header failed\n");
ftl_init_fail(init_ctx);
return;
}
dev->nv_cache.ready = true;
ftl_init_complete(init_ctx);
}
static void
ftl_clear_nv_cache_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
struct spdk_ftl_dev *dev = init_ctx->dev;
struct ftl_nv_cache *nv_cache = &dev->nv_cache;
spdk_bdev_free_io(bdev_io);
if (spdk_unlikely(!success)) {
SPDK_ERRLOG("Unable to clear the non-volatile cache bdev\n");
ftl_init_fail(init_ctx);
return;
}
nv_cache->phase = 1;
if (ftl_nv_cache_write_header(nv_cache, false, ftl_write_nv_cache_md_cb, init_ctx)) {
SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
ftl_init_fail(init_ctx);
}
}
static void
_ftl_nv_cache_scrub(void *ctx)
{
struct ftl_dev_init_ctx *init_ctx = ctx;
struct spdk_ftl_dev *dev = init_ctx->dev;
int rc;
rc = ftl_nv_cache_scrub(&dev->nv_cache, ftl_clear_nv_cache_cb, init_ctx);
if (spdk_unlikely(rc != 0)) {
SPDK_ERRLOG("Unable to clear the non-volatile cache bdev: %s\n",
spdk_strerror(-rc));
ftl_init_fail(init_ctx);
}
}
static int
ftl_setup_initial_state(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
struct spdk_ftl_conf *conf = &dev->conf;
size_t i;
spdk_uuid_generate(&dev->uuid);
dev->num_lbas = 0;
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
dev->num_lbas += ftl_band_num_usable_blocks(&dev->bands[i]);
}
dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
if (ftl_dev_l2p_alloc(dev)) {
SPDK_ERRLOG("Unable to init l2p table\n");
return -1;
}
if (ftl_init_bands_state(dev)) {
SPDK_ERRLOG("Unable to finish the initialization\n");
return -1;
}
if (!ftl_dev_has_nv_cache(dev)) {
ftl_init_complete(init_ctx);
} else {
spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_nv_cache_scrub, init_ctx);
}
return 0;
}
static void
ftl_restore_nv_cache_cb(struct ftl_restore *restore, int status, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
if (spdk_unlikely(status != 0)) {
SPDK_ERRLOG("Failed to restore the non-volatile cache state\n");
ftl_init_fail(init_ctx);
return;
}
ftl_init_complete(init_ctx);
}
static void
ftl_restore_device_cb(struct ftl_restore *restore, int status, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
struct spdk_ftl_dev *dev = init_ctx->dev;
if (status) {
SPDK_ERRLOG("Failed to restore the device from the SSD\n");
ftl_init_fail(init_ctx);
return;
}
if (ftl_init_bands_state(dev)) {
SPDK_ERRLOG("Unable to finish the initialization\n");
ftl_init_fail(init_ctx);
return;
}
if (!ftl_dev_has_nv_cache(dev)) {
ftl_init_complete(init_ctx);
return;
}
ftl_restore_nv_cache(restore, ftl_restore_nv_cache_cb, init_ctx);
}
static void
ftl_restore_md_cb(struct ftl_restore *restore, int status, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
if (status) {
SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
goto error;
}
/* After the metadata is read it should be possible to allocate the L2P */
if (ftl_dev_l2p_alloc(init_ctx->dev)) {
SPDK_ERRLOG("Failed to allocate the L2P\n");
goto error;
}
if (ftl_restore_device(restore, ftl_restore_device_cb, init_ctx)) {
SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
goto error;
}
return;
error:
ftl_init_fail(init_ctx);
}
static int
ftl_restore_state(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
dev->uuid = init_ctx->opts.uuid;
if (ftl_restore_md(dev, ftl_restore_md_cb, init_ctx)) {
SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
return -1;
}
return 0;
}
static void
ftl_dev_update_bands(struct spdk_ftl_dev *dev)
{
struct ftl_band *band, *temp_band;
size_t i;
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
band = &dev->bands[i];
band->tail_md_addr = ftl_band_tail_md_addr(band);
}
/* Remove band from shut_bands list to prevent further processing */
/* if all blocks on this band are bad */
LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
if (!band->num_zones) {
dev->num_bands--;
LIST_REMOVE(band, list_entry);
}
}
}
static void
ftl_dev_init_state(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
ftl_dev_update_bands(dev);
if (ftl_dev_init_core_thread(dev, &init_ctx->opts)) {
SPDK_ERRLOG("Unable to initialize device thread\n");
ftl_init_fail(init_ctx);
return;
}
if (init_ctx->opts.mode & SPDK_FTL_MODE_CREATE) {
if (ftl_setup_initial_state(init_ctx)) {
SPDK_ERRLOG("Failed to setup initial state of the device\n");
ftl_init_fail(init_ctx);
return;
}
} else {
if (ftl_restore_state(init_ctx)) {
SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
ftl_init_fail(init_ctx);
return;
}
}
}
static void ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx);
static void
ftl_dev_get_zone_info_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
struct spdk_ftl_dev *dev = init_ctx->dev;
struct ftl_band *band;
struct ftl_zone *zone;
struct ftl_addr addr;
size_t i, zones_left, num_zones;
spdk_bdev_free_io(bdev_io);
if (spdk_unlikely(!success)) {
SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
ftl_init_fail(init_ctx);
return;
}
zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
for (i = 0; i < num_zones; ++i) {
addr.offset = init_ctx->info[i].zone_id;
band = &dev->bands[ftl_addr_get_band(dev, addr)];
zone = &band->zone_buf[ftl_addr_get_punit(dev, addr)];
zone->info = init_ctx->info[i];
/* TODO: add support for zone capacity less than zone size */
if (zone->info.capacity != ftl_get_num_blocks_in_zone(dev)) {
zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE;
SPDK_ERRLOG("Zone capacity is not equal zone size for "
"zone id: %"PRIu64"\n", init_ctx->zone_id);
}
if (zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE) {
band->num_zones++;
CIRCLEQ_INSERT_TAIL(&band->zones, zone, circleq);
}
}
init_ctx->zone_id = init_ctx->zone_id + num_zones * ftl_get_num_blocks_in_zone(dev);
ftl_dev_get_zone_info(init_ctx);
}
static void
ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
size_t zones_left, num_zones;
int rc;
zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
if (zones_left == 0) {
ftl_dev_init_state(init_ctx);
return;
}
num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
rc = spdk_bdev_get_zone_info(dev->base_bdev_desc, init_ctx->ioch,
init_ctx->zone_id, num_zones, init_ctx->info,
ftl_dev_get_zone_info_cb, init_ctx);
if (spdk_unlikely(rc != 0)) {
SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
ftl_init_fail(init_ctx);
}
}
static int
ftl_dev_init_zones(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
init_ctx->zone_id = 0;
init_ctx->ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
if (!init_ctx->ioch) {
SPDK_ERRLOG("Failed to get base bdev IO channel\n");
return -1;
}
ftl_dev_get_zone_info(init_ctx);
return 0;
}
static int
ftl_io_channel_create_cb(void *io_device, void *ctx)
{
struct spdk_ftl_dev *dev = io_device;
struct ftl_io_channel *ioch = ctx;
char mempool_name[32];
int rc;
rc = snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
if (rc < 0 || rc >= (int)sizeof(mempool_name)) {
SPDK_ERRLOG("Failed to create IO channel pool name\n");
return -1;
}
ioch->cache_ioch = NULL;
ioch->dev = dev;
ioch->elem_size = sizeof(struct ftl_md_io);
ioch->io_pool = spdk_mempool_create(mempool_name,
dev->conf.user_io_pool_size,
ioch->elem_size,
0,
SPDK_ENV_SOCKET_ID_ANY);
if (!ioch->io_pool) {
SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
return -1;
}
ioch->base_ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
if (!ioch->base_ioch) {
SPDK_ERRLOG("Failed to create base bdev IO channel\n");
goto fail_ioch;
}
if (ftl_dev_has_nv_cache(dev)) {
ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
if (!ioch->cache_ioch) {
SPDK_ERRLOG("Failed to create cache IO channel\n");
goto fail_cache;
}
}
TAILQ_INIT(&ioch->write_cmpl_queue);
TAILQ_INIT(&ioch->retry_queue);
ioch->poller = spdk_poller_register(ftl_io_channel_poll, ioch, 0);
if (!ioch->poller) {
SPDK_ERRLOG("Failed to register IO channel poller\n");
goto fail_poller;
}
return 0;
fail_poller:
if (ioch->cache_ioch) {
spdk_put_io_channel(ioch->cache_ioch);
}
fail_cache:
spdk_put_io_channel(ioch->base_ioch);
fail_ioch:
spdk_mempool_free(ioch->io_pool);
return -1;
}
static void
ftl_io_channel_destroy_cb(void *io_device, void *ctx)
{
struct ftl_io_channel *ioch = ctx;
spdk_poller_unregister(&ioch->poller);
spdk_mempool_free(ioch->io_pool);
spdk_put_io_channel(ioch->base_ioch);
if (ioch->cache_ioch) {
spdk_put_io_channel(ioch->cache_ioch);
}
}
static int
ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
{
spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
sizeof(struct ftl_io_channel),
NULL);
return 0;
}
static int
ftl_dev_init_base_bdev(struct spdk_ftl_dev *dev, const char *bdev_name)
{
uint32_t block_size;
uint64_t num_blocks;
struct spdk_bdev *bdev;
bdev = spdk_bdev_get_by_name(bdev_name);
if (!bdev) {
SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
return -1;
}
if (!spdk_bdev_is_zoned(bdev)) {
SPDK_ERRLOG("Bdev dosen't support zone capabilities: %s\n",
spdk_bdev_get_name(bdev));
return -1;
}
if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
dev, &dev->base_bdev_desc)) {
SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
return -1;
}
if (spdk_bdev_module_claim_bdev(bdev, dev->base_bdev_desc, &g_ftl_bdev_module)) {
spdk_bdev_close(dev->base_bdev_desc);
dev->base_bdev_desc = NULL;
SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
return -1;
}
dev->xfer_size = spdk_bdev_get_write_unit_size(bdev);
dev->md_size = spdk_bdev_get_md_size(bdev);
block_size = spdk_bdev_get_block_size(bdev);
if (block_size != FTL_BLOCK_SIZE) {
SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
return -1;
}
num_blocks = spdk_bdev_get_num_blocks(bdev);
if (num_blocks % ftl_get_num_punits(dev)) {
SPDK_ERRLOG("Unsupported geometry. Base bdev block count must be multiple "
"of optimal number of zones.\n");
return -1;
}
if (ftl_is_append_supported(dev) &&
!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) {
SPDK_ERRLOG("Bdev dosen't support append: %s\n",
spdk_bdev_get_name(bdev));
return -1;
}
dev->num_bands = num_blocks / (ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev));
dev->addr_len = spdk_u64log2(num_blocks) + 1;
return 0;
}
static void
ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
{
struct ftl_lba_map_request *request = obj;
spdk_bit_array_free(&request->segments);
}
static void
ftl_release_bdev(struct spdk_bdev_desc *bdev_desc)
{
if (!bdev_desc) {
return;
}
spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_desc));
spdk_bdev_close(bdev_desc);
}
static void
ftl_dev_free_sync(struct spdk_ftl_dev *dev)
{
struct spdk_ftl_dev *iter;
size_t i;
if (!dev) {
return;
}
pthread_mutex_lock(&g_ftl_queue_lock);
STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
if (iter == dev) {
STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
break;
}
}
pthread_mutex_unlock(&g_ftl_queue_lock);
assert(LIST_EMPTY(&dev->wptr_list));
assert(ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_INTERNAL) == 0);
assert(ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_USER) == 0);
ftl_dev_dump_bands(dev);
ftl_dev_dump_stats(dev);
spdk_io_device_unregister(dev, NULL);
if (dev->core_thread.thread) {
ftl_dev_free_thread(dev, &dev->core_thread);
}
if (dev->bands) {
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
free(dev->bands[i].zone_buf);
spdk_bit_array_free(&dev->bands[i].lba_map.vld);
spdk_bit_array_free(&dev->bands[i].reloc_bitmap);
}
}
spdk_dma_free(dev->nv_cache.dma_buf);
spdk_mempool_free(dev->lba_pool);
spdk_mempool_free(dev->nv_cache.md_pool);
spdk_mempool_free(dev->media_events_pool);
if (dev->lba_request_pool) {
spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
}
spdk_mempool_free(dev->lba_request_pool);
ftl_rwb_free(dev->rwb);
ftl_reloc_free(dev->reloc);
ftl_release_bdev(dev->nv_cache.bdev_desc);
ftl_release_bdev(dev->base_bdev_desc);
free(dev->name);
free(dev->bands);
free(dev->l2p);
free(dev);
}
int
spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb_fn, void *cb_arg)
{
struct spdk_ftl_dev *dev;
struct spdk_ftl_dev_init_opts opts = *_opts;
struct ftl_dev_init_ctx *init_ctx = NULL;
int rc = -ENOMEM;
dev = calloc(1, sizeof(*dev));
if (!dev) {
return -ENOMEM;
}
init_ctx = calloc(1, sizeof(*init_ctx));
if (!init_ctx) {
goto fail_sync;
}
init_ctx->dev = dev;
init_ctx->opts = *_opts;
init_ctx->cb_fn = cb_fn;
init_ctx->cb_arg = cb_arg;
init_ctx->thread = spdk_get_thread();
if (!opts.conf) {
opts.conf = &g_default_conf;
}
if (!opts.base_bdev) {
SPDK_ERRLOG("Lack of underlying device in configuration\n");
rc = -EINVAL;
goto fail_sync;
}
dev->conf = *opts.conf;
dev->limit = SPDK_FTL_LIMIT_MAX;
dev->name = strdup(opts.name);
if (!dev->name) {
SPDK_ERRLOG("Unable to set device name\n");
goto fail_sync;
}
if (ftl_dev_init_base_bdev(dev, opts.base_bdev)) {
SPDK_ERRLOG("Unsupported underlying device\n");
goto fail_sync;
}
/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
/* so we don't have to clean up in each of the init functions. */
if (ftl_check_conf(dev, opts.conf)) {
SPDK_ERRLOG("Invalid device configuration\n");
goto fail_sync;
}
if (ftl_init_lba_map_pools(dev)) {
SPDK_ERRLOG("Unable to init LBA map pools\n");
goto fail_sync;
}
if (ftl_init_media_events_pool(dev)) {
SPDK_ERRLOG("Unable to init media events pools\n");
goto fail_sync;
}
ftl_init_wptr_list(dev);
if (ftl_dev_init_bands(dev)) {
SPDK_ERRLOG("Unable to initialize band array\n");
goto fail_sync;
}
if (ftl_dev_init_nv_cache(dev, opts.cache_bdev)) {
SPDK_ERRLOG("Unable to initialize persistent cache\n");
goto fail_sync;
}
dev->rwb = ftl_rwb_init(&dev->conf, dev->xfer_size, dev->md_size, ftl_get_num_punits(dev));
if (!dev->rwb) {
SPDK_ERRLOG("Unable to initialize rwb structures\n");
goto fail_sync;
}
dev->reloc = ftl_reloc_init(dev);
if (!dev->reloc) {
SPDK_ERRLOG("Unable to initialize reloc structures\n");
goto fail_sync;
}
if (ftl_dev_init_io_channel(dev)) {
SPDK_ERRLOG("Unable to initialize IO channels\n");
goto fail_sync;
}
if (ftl_dev_init_zones(init_ctx)) {
SPDK_ERRLOG("Failed to initialize zones\n");
goto fail_async;
}
return 0;
fail_sync:
ftl_dev_free_sync(dev);
ftl_dev_free_init_ctx(init_ctx);
return rc;
fail_async:
ftl_init_fail(init_ctx);
return 0;
}
static void
_ftl_halt_defrag(void *arg)
{
ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
}
static void
ftl_halt_complete_cb(void *ctx)
{
struct ftl_dev_init_ctx *fini_ctx = ctx;
ftl_dev_free_sync(fini_ctx->dev);
if (fini_ctx->cb_fn != NULL) {
fini_ctx->cb_fn(NULL, fini_ctx->cb_arg, fini_ctx->halt_complete_status);
}
ftl_dev_free_init_ctx(fini_ctx);
}
static void
ftl_nv_cache_header_fini_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct ftl_dev_init_ctx *fini_ctx = cb_arg;
int rc = 0;
spdk_bdev_free_io(bdev_io);
if (spdk_unlikely(!success)) {
SPDK_ERRLOG("Failed to write non-volatile cache metadata header\n");
rc = -EIO;
}
fini_ctx->halt_complete_status = rc;
spdk_thread_send_msg(fini_ctx->thread, ftl_halt_complete_cb, fini_ctx);
}
static int
ftl_halt_poller(void *ctx)
{
struct ftl_dev_init_ctx *fini_ctx = ctx;
struct spdk_ftl_dev *dev = fini_ctx->dev;
if (!dev->core_thread.poller) {
spdk_poller_unregister(&fini_ctx->poller);
if (ftl_dev_has_nv_cache(dev)) {
ftl_nv_cache_write_header(&dev->nv_cache, true,
ftl_nv_cache_header_fini_cb, fini_ctx);
} else {
fini_ctx->halt_complete_status = 0;
spdk_thread_send_msg(fini_ctx->thread, ftl_halt_complete_cb, fini_ctx);
}
}
return 0;
}
static void
ftl_add_halt_poller(void *ctx)
{
struct ftl_dev_init_ctx *fini_ctx = ctx;
struct spdk_ftl_dev *dev = fini_ctx->dev;
dev->halt = 1;
_ftl_halt_defrag(dev);
assert(!fini_ctx->poller);
fini_ctx->poller = spdk_poller_register(ftl_halt_poller, fini_ctx, 100);
}
static int
_spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
struct spdk_thread *thread)
{
struct ftl_dev_init_ctx *fini_ctx;
if (dev->halt_started) {
dev->halt_started = true;
return -EBUSY;
}
fini_ctx = calloc(1, sizeof(*fini_ctx));
if (!fini_ctx) {
return -ENOMEM;
}
fini_ctx->dev = dev;
fini_ctx->cb_fn = cb_fn;
fini_ctx->cb_arg = cb_arg;
fini_ctx->thread = thread;
ftl_rwb_disable_interleaving(dev->rwb);
spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, fini_ctx);
return 0;
}
int
spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg)
{
return _spdk_ftl_dev_free(dev, cb_fn, cb_arg, spdk_get_thread());
}
SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)