nvmf: Eliminate spdk_nvmf_tgt_accept()

The poller is now created internally to the library whenever a target
is constructed. Applications are not expected to poll for connections
any longer.

Change-Id: I523eb6adcc042c1ba2ed41b1cb41256b8bf63772
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3583
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
Ben Walker 2020-06-29 10:56:47 -07:00 committed by Tomasz Zawadzki
parent 4be6d32b97
commit 77ab6f2839
9 changed files with 51 additions and 103 deletions

View File

@ -117,10 +117,10 @@ implementations of the transport interface will have to implement that function.
The NVMe-oF target no longer supports connecting scheduling configuration and instead
always uses what was previously called "transport" scheduling.
`spdk_nvmf_tgt_accept` no longer takes a function pointer as an argument. New connections
are automatically assigned to poll groups by the underlying transport. Further,
`spdk_nvmf_transport_ops` has changed such that the accept function pointer no longer
takes a function pointer as an argument. Instead, transports should call
`spdk_nvmf_tgt_accept` no longer exists. The accept process now occurs automatically after
the creation of an nvmf target and queue pairs are assigned to poll groups by the underlying
transport. Further, `spdk_nvmf_transport_ops` has changed such that the accept function
pointer no longer takes a function pointer as an argument. Instead, transports should call
`spdk_nvmf_tgt_new_qpair` whenever they previously would have called that callback.
The NVMe-oF target now supports aborting any submitted NVM or Admin command. Previously,

View File

@ -83,7 +83,8 @@ Namespaces are bdevs. See @ref bdev for more information about the SPDK bdev
layer. A bdev may be obtained by calling spdk_bdev_get_by_name().
Once a subsystem exists and the target is listening on an address, new
connections may be accepted by polling spdk_nvmf_tgt_accept().
connections will be automatically assigned to poll groups as they are
detected.
All I/O to a subsystem is driven by a poll group, which polls for incoming
network I/O. Poll groups may be created by calling
@ -91,13 +92,6 @@ spdk_nvmf_poll_group_create(). They automatically request to begin polling
upon creation on the thread from which they were created. Most importantly, *a
poll group may only be accessed from the thread on which it was created.*
When spdk_nvmf_tgt_accept() detects a new connection, it chooses an optimal
poll group by first calling spdk_nvmf_get_optimal_poll_group(), which calls down
into the transport, and then assigns the qpair to the optimal poll group by
calling spdk_nvmf_poll_group_add(). This all happens within the NVMe-oF target
library and the NVMe-oF target application is not required to do anything other
than continue to periodically poll spdk_nvmf_tgt_accept().
## Access Control
Access control is performed at the subsystem level by adding allowed listen
@ -110,9 +104,7 @@ and hosts may only be added to inactive or paused subsystems.
A discovery subsystem, as defined by the NVMe-oF specification, is
automatically created for each NVMe-oF target constructed. Connections to the
discovery subsystem are handled in the same way as any other subsystem - new
qpairs are created in response to spdk_nvmf_tgt_accept() and they must be
assigned to a poll group.
discovery subsystem are handled in the same way as any other subsystem.
## Transports
@ -131,15 +123,7 @@ fabrics simultaneously.
The SPDK NVMe-oF target library does not strictly dictate threading model, but
poll groups do all of their polling and I/O processing on the thread they are
created on. Given that, it almost always makes sense to create one poll group
per thread used in the application. New qpairs created in response to
spdk_nvmf_tgt_accept() can be handed out round-robin to the poll groups. This
is how the SPDK NVMe-oF target application currently functions.
More advanced algorithms for distributing qpairs to poll groups are possible.
For instance, a NUMA-aware algorithm would be an improvement over basic
round-robin, where NUMA-aware means assigning qpairs to poll groups running on
CPU cores that are on the same NUMA node as the network adapter and storage
device. Load-aware algorithms also may have benefits.
per thread used in the application.
## Scaling Across CPU Cores

View File

@ -54,11 +54,9 @@ enum nvmf_target_state {
NVMF_INIT_TARGET,
NVMF_INIT_POLL_GROUPS,
NVMF_INIT_START_SUBSYSTEMS,
NVMF_INIT_START_ACCEPTOR,
NVMF_RUNNING,
NVMF_FINI_STOP_SUBSYSTEMS,
NVMF_FINI_POLL_GROUPS,
NVMF_FINI_STOP_ACCEPTOR,
NVMF_FINI_TARGET,
NVMF_FINI_SUBSYSTEM,
};
@ -99,7 +97,7 @@ static struct spdk_thread *g_fini_thread = NULL;
static struct nvmf_target g_nvmf_tgt = {
.max_subsystems = NVMF_DEFAULT_SUBSYSTEMS,
};
static struct spdk_poller *g_acceptor_poller = NULL;
static struct nvmf_target_poll_group *g_next_pg = NULL;
static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER;
static bool g_reactors_exit = false;
@ -528,16 +526,6 @@ nvmf_tgt_stop_subsystems(struct nvmf_target *nvmf_tgt)
}
}
static int
nvmf_tgt_acceptor_poll(void *arg)
{
struct nvmf_target *nvmf_tgt = arg;
spdk_nvmf_tgt_accept(nvmf_tgt->tgt);
return -1;
}
static void
nvmf_tgt_subsystem_start_next(struct spdk_nvmf_subsystem *subsystem,
void *cb_arg, int status)
@ -558,7 +546,7 @@ nvmf_tgt_subsystem_start_next(struct spdk_nvmf_subsystem *subsystem,
fprintf(stdout, "all subsystems of target started\n");
g_target_state = NVMF_INIT_START_ACCEPTOR;
g_target_state = NVMF_RUNNING;
nvmf_target_advance_state();
}
@ -587,7 +575,7 @@ nvmf_tgt_start_subsystems(struct nvmf_target *nvmf_tgt)
g_target_state = NVMF_FINI_STOP_SUBSYSTEMS;
}
} else {
g_target_state = NVMF_INIT_START_ACCEPTOR;
g_target_state = NVMF_RUNNING;
}
}
@ -670,7 +658,7 @@ _nvmf_tgt_destroy_poll_groups_done(void *ctx)
if (--g_num_poll_groups == 0) {
fprintf(stdout, "destroy targets's poll groups done\n");
g_target_state = NVMF_FINI_STOP_ACCEPTOR;
g_target_state = NVMF_FINI_TARGET;
nvmf_target_advance_state();
}
}
@ -779,12 +767,6 @@ nvmf_target_advance_state(void)
case NVMF_INIT_START_SUBSYSTEMS:
nvmf_tgt_start_subsystems(&g_nvmf_tgt);
break;
case NVMF_INIT_START_ACCEPTOR:
g_acceptor_poller = SPDK_POLLER_REGISTER(nvmf_tgt_acceptor_poll, &g_nvmf_tgt,
g_acceptor_poll_rate);
fprintf(stdout, "Acceptor running\n");
g_target_state = NVMF_RUNNING;
break;
case NVMF_RUNNING:
fprintf(stdout, "nvmf target is running\n");
if (g_migrate_pg_period_us != 0) {
@ -799,10 +781,6 @@ nvmf_target_advance_state(void)
case NVMF_FINI_POLL_GROUPS:
nvmf_poll_groups_destroy();
break;
case NVMF_FINI_STOP_ACCEPTOR:
spdk_poller_unregister(&g_acceptor_poller);
g_target_state = NVMF_FINI_TARGET;
break;
case NVMF_FINI_TARGET:
nvmf_destroy_nvmf_tgt();
break;

View File

@ -68,6 +68,7 @@ struct spdk_nvmf_transport;
struct spdk_nvmf_target_opts {
char name[NVMF_TGT_NAME_MAX_LENGTH];
uint32_t max_subsystems;
uint32_t acceptor_poll_rate;
};
struct spdk_nvmf_transport_opts {
@ -228,13 +229,6 @@ int spdk_nvmf_tgt_listen(struct spdk_nvmf_tgt *tgt,
int spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt,
struct spdk_nvme_transport_id *trid);
/**
* Poll the target for incoming connections.
*
* \param tgt The target associated with the listen address.
*/
uint32_t spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt);
/**
* Create a poll group.
*

View File

@ -50,6 +50,7 @@
SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF)
#define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024
#define SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US 10000
static TAILQ_HEAD(, spdk_nvmf_tgt) g_nvmf_tgts = TAILQ_HEAD_INITIALIZER(g_nvmf_tgts);
@ -231,10 +232,25 @@ nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group)
_nvmf_tgt_disconnect_next_qpair(ctx);
}
static int
nvmf_tgt_accept(void *ctx)
{
struct spdk_nvmf_tgt *tgt = ctx;
struct spdk_nvmf_transport *transport, *tmp;
int count = 0;
TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) {
count += nvmf_transport_accept(transport);
}
return count;
}
struct spdk_nvmf_tgt *
spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
{
struct spdk_nvmf_tgt *tgt, *tmp_tgt;
uint32_t acceptor_poll_rate;
if (strnlen(opts->name, NVMF_TGT_NAME_MAX_LENGTH) == NVMF_TGT_NAME_MAX_LENGTH) {
SPDK_ERRLOG("Provided target name exceeds the max length of %u.\n", NVMF_TGT_NAME_MAX_LENGTH);
@ -261,6 +277,12 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
tgt->max_subsystems = opts->max_subsystems;
}
if (!opts || !opts->acceptor_poll_rate) {
acceptor_poll_rate = SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US;
} else {
acceptor_poll_rate = opts->acceptor_poll_rate;
}
tgt->discovery_genctr = 0;
TAILQ_INIT(&tgt->transports);
TAILQ_INIT(&tgt->poll_groups);
@ -273,7 +295,12 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
pthread_mutex_init(&tgt->mutex, NULL);
TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link);
tgt->accept_poller = SPDK_POLLER_REGISTER(nvmf_tgt_accept, tgt, acceptor_poll_rate);
if (!tgt->accept_poller) {
free(tgt->subsystems);
free(tgt);
return NULL;
}
spdk_io_device_register(tgt,
nvmf_tgt_create_poll_group,
@ -281,6 +308,8 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
sizeof(struct spdk_nvmf_poll_group),
tgt->name);
TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link);
return tgt;
}
@ -326,6 +355,8 @@ spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt,
tgt->destroy_cb_fn = cb_fn;
tgt->destroy_cb_arg = cb_arg;
spdk_poller_unregister(&tgt->accept_poller);
TAILQ_REMOVE(&g_nvmf_tgts, tgt, link);
spdk_io_device_unregister(tgt, nvmf_tgt_destroy_cb);
@ -774,19 +805,6 @@ spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair
spdk_thread_send_msg(group->thread, _nvmf_poll_group_add, ctx);
}
uint32_t
spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt)
{
struct spdk_nvmf_transport *transport, *tmp;
uint32_t count = 0;
TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) {
count += nvmf_transport_accept(transport);
}
return count;
}
struct spdk_nvmf_poll_group *
spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt)
{

View File

@ -67,6 +67,8 @@ struct spdk_nvmf_tgt {
uint64_t discovery_genctr;
struct spdk_poller *accept_poller;
uint32_t max_subsystems;
/* Array of subsystem pointers of size max_subsystems indexed by sid */

View File

@ -11,7 +11,6 @@
spdk_nvmf_tgt_write_config_json;
spdk_nvmf_tgt_listen;
spdk_nvmf_tgt_stop_listen;
spdk_nvmf_tgt_accept;
spdk_nvmf_poll_group_create;
spdk_nvmf_get_optimal_poll_group;
spdk_nvmf_poll_group_destroy;

View File

@ -199,6 +199,7 @@ nvmf_parse_nvmf_tgt(void)
}
opts.max_subsystems = g_spdk_nvmf_tgt_max_subsystems;
opts.acceptor_poll_rate = g_spdk_nvmf_tgt_conf->acceptor_poll_rate;
g_spdk_nvmf_tgt = spdk_nvmf_tgt_create(&opts);
g_spdk_nvmf_tgt_max_subsystems = 0;

View File

@ -46,11 +46,9 @@ enum nvmf_tgt_state {
NVMF_TGT_INIT_PARSE_CONFIG,
NVMF_TGT_INIT_CREATE_POLL_GROUPS,
NVMF_TGT_INIT_START_SUBSYSTEMS,
NVMF_TGT_INIT_START_ACCEPTOR,
NVMF_TGT_RUNNING,
NVMF_TGT_FINI_STOP_SUBSYSTEMS,
NVMF_TGT_FINI_DESTROY_POLL_GROUPS,
NVMF_TGT_FINI_STOP_ACCEPTOR,
NVMF_TGT_FINI_FREE_RESOURCES,
NVMF_TGT_STOPPED,
NVMF_TGT_ERROR,
@ -72,8 +70,6 @@ static struct spdk_thread *g_tgt_fini_thread = NULL;
static TAILQ_HEAD(, nvmf_tgt_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups);
static size_t g_num_poll_groups = 0;
static struct spdk_poller *g_acceptor_poller = NULL;
static void nvmf_tgt_advance_state(void);
static void
@ -103,28 +99,13 @@ nvmf_subsystem_fini(void)
nvmf_shutdown_cb(NULL);
}
static int
acceptor_poll(void *arg)
{
struct spdk_nvmf_tgt *tgt = arg;
uint32_t count;
count = spdk_nvmf_tgt_accept(tgt);
if (count > 0) {
return SPDK_POLLER_BUSY;
} else {
return SPDK_POLLER_IDLE;
}
}
static void
_nvmf_tgt_destroy_poll_group_done(void *ctx)
{
assert(g_num_poll_groups > 0);
if (--g_num_poll_groups == 0) {
g_tgt_state = NVMF_TGT_FINI_STOP_ACCEPTOR;
g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
nvmf_tgt_advance_state();
}
}
@ -236,7 +217,7 @@ nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
return;
}
g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR;
g_tgt_state = NVMF_TGT_RUNNING;
nvmf_tgt_advance_state();
}
@ -406,15 +387,10 @@ nvmf_tgt_advance_state(void)
g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
}
} else {
g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR;
g_tgt_state = NVMF_TGT_RUNNING;
}
break;
}
case NVMF_TGT_INIT_START_ACCEPTOR:
g_acceptor_poller = SPDK_POLLER_REGISTER(acceptor_poll, g_spdk_nvmf_tgt,
g_spdk_nvmf_tgt_conf->acceptor_poll_rate);
g_tgt_state = NVMF_TGT_RUNNING;
break;
case NVMF_TGT_RUNNING:
spdk_subsystem_init_next(0);
break;
@ -437,10 +413,6 @@ nvmf_tgt_advance_state(void)
/* Send a message to each poll group thread, and terminate the thread */
nvmf_tgt_destroy_poll_groups();
break;
case NVMF_TGT_FINI_STOP_ACCEPTOR:
spdk_poller_unregister(&g_acceptor_poller);
g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
break;
case NVMF_TGT_FINI_FREE_RESOURCES:
spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL);
break;