bdev: create one module channel per io_device

Bdev layer has been recently refactored to share
internal module channels for *all* bdevs of the
same bdev module. This makes us return ENOMEM for I/O
of one bdev if another bdev of the same module (nvme, split, etc)
is entirely saturated. This is not the behavior we want,
as these bdevs may not have anything in common.

This partially reverts commit
e433001 (bdev: Put 3 types of channels in a hierarchy)

Change-Id: Ice0570f92fcaffa6301a282c53eeec8215f354fe
Reported-by: Ben Walker <benjamin.walker@intel.com>
Signed-off-by: Dariusz Stojaczyk <dariuszx.stojaczyk@intel.com>
Reviewed-on: https://review.gerrithub.io/409996
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
This commit is contained in:
Dariusz Stojaczyk 2018-05-04 09:58:01 +02:00 committed by Daniel Verkamp
parent 331887f7e2
commit 7ac765e66b

View File

@ -138,6 +138,8 @@ struct spdk_bdev_mgmt_channel {
*/
bdev_io_stailq_t per_thread_cache;
uint32_t per_thread_cache_count;
TAILQ_HEAD(, spdk_bdev_module_channel) module_channels;
};
/*
@ -146,7 +148,6 @@ struct spdk_bdev_mgmt_channel {
* IO to one bdev after IO from other bdev completes.
*/
struct spdk_bdev_module_channel {
/* The bdev management channel */
struct spdk_bdev_mgmt_channel *mgmt_ch;
@ -167,6 +168,12 @@ struct spdk_bdev_module_channel {
*/
uint64_t nomem_threshold;
/* I/O channel allocated by a bdev module */
struct spdk_io_channel *module_ch;
/* Refcount of bdev channels using this channel */
uint32_t ref;
TAILQ_ENTRY(spdk_bdev_module_channel) link;
};
@ -450,6 +457,8 @@ spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf)
STAILQ_INIT(&ch->per_thread_cache);
ch->per_thread_cache_count = 0;
TAILQ_INIT(&ch->module_channels);
return 0;
}
@ -460,7 +469,11 @@ spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf)
struct spdk_bdev_io *bdev_io;
if (!STAILQ_EMPTY(&ch->need_buf_small) || !STAILQ_EMPTY(&ch->need_buf_large)) {
SPDK_ERRLOG("Pending I/O list wasn't empty on channel free\n");
SPDK_ERRLOG("Pending I/O list wasn't empty on mgmt channel free\n");
}
if (!TAILQ_EMPTY(&ch->module_channels)) {
SPDK_ERRLOG("Module channel list wasn't empty on mgmt channel free\n");
}
while (!STAILQ_EMPTY(&ch->per_thread_cache)) {
@ -550,37 +563,6 @@ spdk_bdev_module_examine_done(struct spdk_bdev_module *module)
spdk_bdev_module_action_done(module);
}
static int
spdk_bdev_module_channel_create(void *io_device, void *ctx_buf)
{
struct spdk_bdev_module_channel *ch = ctx_buf;
struct spdk_io_channel *mgmt_ch;
ch->io_outstanding = 0;
TAILQ_INIT(&ch->nomem_io);
ch->nomem_threshold = 0;
mgmt_ch = spdk_get_io_channel(&g_bdev_mgr);
if (!mgmt_ch) {
return -1;
}
ch->mgmt_ch = spdk_io_channel_get_ctx(mgmt_ch);
return 0;
}
static void
spdk_bdev_module_channel_destroy(void *io_device, void *ctx_buf)
{
struct spdk_bdev_module_channel *ch = ctx_buf;
assert(ch->io_outstanding == 0);
assert(TAILQ_EMPTY(&ch->nomem_io));
spdk_put_io_channel(spdk_io_channel_from_ctx(ch->mgmt_ch));
}
static int
spdk_bdev_modules_init(void)
{
@ -588,10 +570,6 @@ spdk_bdev_modules_init(void)
int rc = 0;
TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) {
spdk_io_device_register(module,
spdk_bdev_module_channel_create,
spdk_bdev_module_channel_destroy,
sizeof(struct spdk_bdev_module_channel));
rc = module->module_init();
if (rc != 0) {
break;
@ -736,32 +714,33 @@ spdk_bdev_module_finish_iter(void *arg)
bdev_module = TAILQ_NEXT(g_resume_bdev_module, tailq);
}
if (bdev_module) {
/* Save our place so we can resume later. We must
* save the variable here, before calling module_fini()
* below, because in some cases the module may immediately
* call spdk_bdev_module_finish_done() and re-enter
* this function to continue iterating. */
g_resume_bdev_module = bdev_module;
while (bdev_module) {
if (bdev_module->async_fini) {
/* Save our place so we can resume later. We must
* save the variable here, before calling module_fini()
* below, because in some cases the module may immediately
* call spdk_bdev_module_finish_done() and re-enter
* this function to continue iterating. */
g_resume_bdev_module = bdev_module;
}
if (bdev_module->module_fini) {
bdev_module->module_fini();
}
if (!bdev_module->async_fini) {
spdk_bdev_module_finish_done();
if (bdev_module->async_fini) {
return;
}
return;
bdev_module = TAILQ_NEXT(bdev_module, tailq);
}
g_resume_bdev_module = NULL;
spdk_io_device_unregister(&g_bdev_mgr, spdk_bdev_mgr_unregister_cb);
}
static void
spdk_bdev_module_unregister_cb(void *io_device)
void
spdk_bdev_module_finish_done(void)
{
if (spdk_get_thread() != g_fini_thread) {
spdk_thread_send_msg(g_fini_thread, spdk_bdev_module_finish_iter, NULL);
@ -770,12 +749,6 @@ spdk_bdev_module_unregister_cb(void *io_device)
}
}
void
spdk_bdev_module_finish_done(void)
{
spdk_io_device_unregister(g_resume_bdev_module, spdk_bdev_module_unregister_cb);
}
static void
_spdk_bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno)
{
@ -1030,6 +1003,9 @@ static int
_spdk_bdev_channel_create(struct spdk_bdev_channel *ch, void *io_device)
{
struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
struct spdk_io_channel *mgmt_io_ch;
struct spdk_bdev_mgmt_channel *mgmt_ch;
struct spdk_bdev_module_channel *module_ch;
ch->bdev = bdev;
ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt);
@ -1037,12 +1013,41 @@ _spdk_bdev_channel_create(struct spdk_bdev_channel *ch, void *io_device)
return -1;
}
ch->module_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(bdev->module));
mgmt_io_ch = spdk_get_io_channel(&g_bdev_mgr);
if (!mgmt_io_ch) {
return -1;
}
mgmt_ch = spdk_io_channel_get_ctx(mgmt_io_ch);
TAILQ_FOREACH(module_ch, &mgmt_ch->module_channels, link) {
if (module_ch->module_ch == ch->channel) {
spdk_put_io_channel(mgmt_io_ch);
module_ch->ref++;
break;
}
}
if (module_ch == NULL) {
module_ch = calloc(1, sizeof(*module_ch));
if (module_ch == NULL) {
spdk_put_io_channel(mgmt_io_ch);
return -1;
}
module_ch->mgmt_ch = mgmt_ch;
module_ch->io_outstanding = 0;
TAILQ_INIT(&module_ch->nomem_io);
module_ch->nomem_threshold = 0;
module_ch->module_ch = ch->channel;
module_ch->ref = 1;
TAILQ_INSERT_TAIL(&mgmt_ch->module_channels, module_ch, link);
}
memset(&ch->stat, 0, sizeof(ch->stat));
ch->io_outstanding = 0;
TAILQ_INIT(&ch->queued_resets);
ch->flags = 0;
ch->module_ch = module_ch;
return 0;
}
@ -1050,6 +1055,8 @@ _spdk_bdev_channel_create(struct spdk_bdev_channel *ch, void *io_device)
static void
_spdk_bdev_channel_destroy_resource(struct spdk_bdev_channel *ch)
{
struct spdk_bdev_module_channel *module_ch;
if (!ch) {
return;
}
@ -1058,8 +1065,18 @@ _spdk_bdev_channel_destroy_resource(struct spdk_bdev_channel *ch)
spdk_put_io_channel(ch->channel);
}
if (ch->module_ch) {
spdk_put_io_channel(spdk_io_channel_from_ctx(ch->module_ch));
assert(ch->io_outstanding == 0);
module_ch = ch->module_ch;
if (module_ch) {
assert(module_ch->ref > 0);
module_ch->ref--;
if (module_ch->ref == 0) {
assert(module_ch->io_outstanding == 0);
spdk_put_io_channel(spdk_io_channel_from_ctx(module_ch->mgmt_ch));
TAILQ_REMOVE(&module_ch->mgmt_ch->module_channels, module_ch, link);
free(module_ch);
}
}
}