From 750a4213ef7f03472dd1d460a62fdd63303be25e Mon Sep 17 00:00:00 2001 From: Ziye Yang Date: Wed, 15 May 2019 20:40:12 +0800 Subject: [PATCH] nvmf: add spdk_nvmf_get_optimal_poll_group This patch is used to do the following work: 1 It is optimized for NVMe/TCP transport. If the qpair's socket has same NAPI_ID, then the qpair will be handled by the same polling group. 2. We add a new connection scheduling strategy, named as ConnectionScheduler in the configuration file. It will be used to input different scheduler according to the customers' input. Signed-off-by: Ziye Yang Change-Id: Ifc9246eece0da69bdd39fd63bfdefff18be64132 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/454550 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto Reviewed-by: Changpeng Liu --- CHANGELOG.md | 5 ++++ etc/spdk/nvmf.conf.in | 9 +++++++ include/spdk/nvmf.h | 9 +++++++ lib/event/subsystems/nvmf/conf.c | 35 ++++++++++++++++++++++++-- lib/event/subsystems/nvmf/event_nvmf.h | 1 + lib/event/subsystems/nvmf/nvmf_tgt.c | 27 ++++++++++++++++++++ lib/nvmf/nvmf.c | 16 ++++++++++++ lib/nvmf/nvmf_internal.h | 1 + 8 files changed, 101 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a773a1ced8..43d5e23506 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,11 @@ Shared receive queue can now be disabled even for NICs that support it using the `nvmf_create_transport` RPC method parameter `no_srq`. The actual use of a shared receive queue is predicated on hardware support when this flag is not used. +spdk_nvmf_get_optimal_poll_group was added, which is used to return the optimal +poll group for the qpair. And `ConnectionScheduler` configuration is added into the +[Nvmf] section in etc/spdk/nvmf.conf.in to demonstrate how to configure the connection +scheduling strategy among different spdk threads. + ### notify The function `spdk_notify_get_types()` and `spdk_notify_get_events()` were diff --git a/etc/spdk/nvmf.conf.in b/etc/spdk/nvmf.conf.in index e7d6ceac18..e4f0873389 100644 --- a/etc/spdk/nvmf.conf.in +++ b/etc/spdk/nvmf.conf.in @@ -65,6 +65,15 @@ # poll. Units in microseconds. AcceptorPollRate 10000 + # Set how the connection is scheduled among multiple threads, current supported string value are + # "RoundRobin", "Host", "Transport". + # RoundRobin: Schedule the connection with roundrobin manner. + # Host: Schedule the connection according to host IP. + # Transport: Schedule the connection according to the transport characteristics. + # For example, for TCP transport, we can schedule the connection according to socket NAPI_ID info. + # The connection which has the same socket NAPI_ID info will be grouped in the same polling group. + ConnectionScheduler RoundRobin + # One valid transport type must be set in each [Transport]. # The first is the case of RDMA transport and the second is the case of TCP transport. [Transport] diff --git a/include/spdk/nvmf.h b/include/spdk/nvmf.h index 4b3026c1f1..d1ca9d917c 100644 --- a/include/spdk/nvmf.h +++ b/include/spdk/nvmf.h @@ -163,6 +163,15 @@ void spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt, new_qpair_fn cb_fn); */ struct spdk_nvmf_poll_group *spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt); +/** + * Get optimal nvmf poll group for the qpair. + * + * \param qpair Requested qpair + * + * \return a poll group on success, or NULL on failure. + */ +struct spdk_nvmf_poll_group *spdk_nvmf_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair); + /** * Destroy a poll group. * diff --git a/lib/event/subsystems/nvmf/conf.c b/lib/event/subsystems/nvmf/conf.c index ac26f87e84..6534f5c3f6 100644 --- a/lib/event/subsystems/nvmf/conf.c +++ b/lib/event/subsystems/nvmf/conf.c @@ -101,16 +101,42 @@ spdk_nvmf_read_config_file_tgt_max_subsystems(struct spdk_conf_section *sp, } } -static void +static int spdk_nvmf_read_config_file_tgt_conf(struct spdk_conf_section *sp, struct spdk_nvmf_tgt_conf *conf) { int acceptor_poll_rate; + const char *conn_scheduler; + int rc = 0; acceptor_poll_rate = spdk_conf_section_get_intval(sp, "AcceptorPollRate"); if (acceptor_poll_rate >= 0) { conf->acceptor_poll_rate = acceptor_poll_rate; } + + conn_scheduler = spdk_conf_section_get_val(sp, "ConnectionScheduler"); + + if (conn_scheduler) { + if (strcasecmp(conn_scheduler, "RoundRobin") == 0) { + conf->conn_sched = CONNECT_SCHED_ROUND_ROBIN; + } else if (strcasecmp(conn_scheduler, "Host") == 0) { + conf->conn_sched = CONNECT_SCHED_HOST_IP; + } else if (strcasecmp(conn_scheduler, "Transport") == 0) { + conf->conn_sched = CONNECT_SCHED_TRANSPORT_OPTIMAL_GROUP; + } else { + SPDK_ERRLOG("The valid value of ConnectionScheduler should be:\n" + "\t RoundRobin\n" + "\t Host\n" + "\t Transport\n"); + rc = -1; + } + + } else { + SPDK_NOTICELOG("The value of ConnectionScheduler is not configured,\n" + "we will use RoundRobin as the default scheduler\n"); + } + + return rc; } static int @@ -132,6 +158,7 @@ spdk_nvmf_parse_tgt_conf(void) { struct spdk_nvmf_tgt_conf *conf; struct spdk_conf_section *sp; + int rc; conf = calloc(1, sizeof(*conf)); if (!conf) { @@ -144,7 +171,11 @@ spdk_nvmf_parse_tgt_conf(void) sp = spdk_conf_find_section(NULL, "Nvmf"); if (sp != NULL) { - spdk_nvmf_read_config_file_tgt_conf(sp, conf); + rc = spdk_nvmf_read_config_file_tgt_conf(sp, conf); + if (rc) { + free(conf); + return NULL; + } } return conf; diff --git a/lib/event/subsystems/nvmf/event_nvmf.h b/lib/event/subsystems/nvmf/event_nvmf.h index 6c45cbeb18..bb2c5c6694 100644 --- a/lib/event/subsystems/nvmf/event_nvmf.h +++ b/lib/event/subsystems/nvmf/event_nvmf.h @@ -48,6 +48,7 @@ enum spdk_nvmf_connect_sched { CONNECT_SCHED_ROUND_ROBIN = 0, CONNECT_SCHED_HOST_IP, + CONNECT_SCHED_TRANSPORT_OPTIMAL_GROUP, }; struct spdk_nvmf_tgt_conf { diff --git a/lib/event/subsystems/nvmf/nvmf_tgt.c b/lib/event/subsystems/nvmf/nvmf_tgt.c index f1e9cebb43..f3964920ec 100644 --- a/lib/event/subsystems/nvmf/nvmf_tgt.c +++ b/lib/event/subsystems/nvmf/nvmf_tgt.c @@ -123,6 +123,30 @@ spdk_nvmf_get_next_pg(void) return pg; } +static struct nvmf_tgt_poll_group * +spdk_nvmf_get_optimal_pg(struct spdk_nvmf_qpair *qpair) +{ + struct nvmf_tgt_poll_group *pg, *_pg = NULL; + struct spdk_nvmf_poll_group *group = spdk_nvmf_get_optimal_poll_group(qpair); + + if (group == NULL) { + _pg = spdk_nvmf_get_next_pg(); + goto end; + } + + TAILQ_FOREACH(pg, &g_poll_groups, link) { + if (pg->group == group) { + _pg = pg; + break; + } + + } + +end: + assert(_pg != NULL); + return _pg; +} + static void nvmf_tgt_remove_host_trid(struct spdk_nvmf_qpair *qpair) { @@ -193,6 +217,9 @@ nvmf_tgt_get_pg(struct spdk_nvmf_qpair *qpair) TAILQ_INSERT_TAIL(&g_nvmf_tgt_host_trids, new_trid, link); } break; + case CONNECT_SCHED_TRANSPORT_OPTIMAL_GROUP: + pg = spdk_nvmf_get_optimal_pg(qpair); + break; case CONNECT_SCHED_ROUND_ROBIN: default: pg = spdk_nvmf_get_next_pg(); diff --git a/lib/nvmf/nvmf.c b/lib/nvmf/nvmf.c index cecb47d4ef..7762cc9e91 100644 --- a/lib/nvmf/nvmf.c +++ b/lib/nvmf/nvmf.c @@ -855,6 +855,7 @@ spdk_nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group, return -1; } + tgroup->group = group; TAILQ_INSERT_TAIL(&group->tgroups, tgroup, link); return 0; @@ -1211,3 +1212,18 @@ fini: cb_fn(cb_arg, rc); } } + + +struct spdk_nvmf_poll_group * +spdk_nvmf_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) +{ + struct spdk_nvmf_transport_poll_group *tgroup; + + tgroup = spdk_nvmf_transport_get_optimal_poll_group(qpair->transport, qpair); + + if (tgroup == NULL) { + return NULL; + } + + return tgroup->group; +} diff --git a/lib/nvmf/nvmf_internal.h b/lib/nvmf/nvmf_internal.h index 4d075fac49..96d978c46a 100644 --- a/lib/nvmf/nvmf_internal.h +++ b/lib/nvmf/nvmf_internal.h @@ -109,6 +109,7 @@ struct spdk_nvmf_transport_poll_group { STAILQ_HEAD(, spdk_nvmf_transport_pg_cache_buf) buf_cache; uint32_t buf_cache_count; uint32_t buf_cache_size; + struct spdk_nvmf_poll_group *group; TAILQ_ENTRY(spdk_nvmf_transport_poll_group) link; };