nvmf: Add IP based load balancing
This patch adds support for IP based load balancing. All the rdma connections coming from a particular IP will be assigned the same core on target. The load balancing policy - roundrobin/hostip can be configured through rpc during startup only. Roundrobin is the default policy. Change-Id: I7bb216cb4cb2639fa401e23090143c4cf1d7bb5a Signed-off-by: Avinash M N <avinash.m.n@wdc.com> Reviewed-on: https://review.gerrithub.io/422190 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
This commit is contained in:
parent
94f87a2dbb
commit
1f3f160581
@ -146,6 +146,7 @@ spdk_nvmf_parse_tgt_conf(void)
|
||||
}
|
||||
|
||||
conf->acceptor_poll_rate = ACCEPT_TIMEOUT_US;
|
||||
conf->conn_sched = DEFAULT_CONN_SCHED;
|
||||
|
||||
sp = spdk_conf_find_section(NULL, "Nvmf");
|
||||
if (sp != NULL) {
|
||||
|
@ -43,9 +43,16 @@
|
||||
#include "spdk_internal/log.h"
|
||||
|
||||
#define ACCEPT_TIMEOUT_US 10000 /* 10ms */
|
||||
#define DEFAULT_CONN_SCHED CONNECT_SCHED_ROUND_ROBIN
|
||||
|
||||
enum spdk_nvmf_connect_sched {
|
||||
CONNECT_SCHED_ROUND_ROBIN = 0,
|
||||
CONNECT_SCHED_HOST_IP,
|
||||
};
|
||||
|
||||
struct spdk_nvmf_tgt_conf {
|
||||
uint32_t acceptor_poll_rate;
|
||||
enum spdk_nvmf_connect_sched conn_sched;
|
||||
};
|
||||
|
||||
extern struct spdk_nvmf_tgt_opts *g_spdk_nvmf_tgt_opts;
|
||||
|
@ -1618,8 +1618,25 @@ nvmf_rpc_subsystem_set_tgt_opts(struct spdk_jsonrpc_request *request,
|
||||
}
|
||||
SPDK_RPC_REGISTER("set_nvmf_target_options", nvmf_rpc_subsystem_set_tgt_opts, SPDK_RPC_STARTUP)
|
||||
|
||||
static int decode_conn_sched(const struct spdk_json_val *val, void *out)
|
||||
{
|
||||
enum spdk_nvmf_connect_sched *sched = out;
|
||||
|
||||
if (spdk_json_strequal(val, "roundrobin") == true) {
|
||||
*sched = CONNECT_SCHED_ROUND_ROBIN;
|
||||
} else if (spdk_json_strequal(val, "hostip") == true) {
|
||||
*sched = CONNECT_SCHED_HOST_IP;
|
||||
} else {
|
||||
SPDK_ERRLOG("Invalid connection scheduling parameter\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct spdk_json_object_decoder nvmf_rpc_subsystem_tgt_conf_decoder[] = {
|
||||
{"acceptor_poll_rate", offsetof(struct spdk_nvmf_tgt_conf, acceptor_poll_rate), spdk_json_decode_uint32, true},
|
||||
{"conn_sched", offsetof(struct spdk_nvmf_tgt_conf, conn_sched), decode_conn_sched, true},
|
||||
};
|
||||
|
||||
static void
|
||||
@ -1645,6 +1662,7 @@ nvmf_rpc_subsystem_set_tgt_conf(struct spdk_jsonrpc_request *request,
|
||||
}
|
||||
|
||||
conf->acceptor_poll_rate = ACCEPT_TIMEOUT_US;
|
||||
conf->conn_sched = DEFAULT_CONN_SCHED;
|
||||
|
||||
if (params != NULL) {
|
||||
if (spdk_json_decode_object(params, nvmf_rpc_subsystem_tgt_conf_decoder,
|
||||
|
@ -59,11 +59,23 @@ struct nvmf_tgt_poll_group {
|
||||
struct spdk_nvmf_poll_group *group;
|
||||
};
|
||||
|
||||
struct nvmf_tgt_host_trid {
|
||||
struct spdk_nvme_transport_id host_trid;
|
||||
uint32_t core;
|
||||
uint32_t ref;
|
||||
TAILQ_ENTRY(nvmf_tgt_host_trid) link;
|
||||
};
|
||||
|
||||
/* List of host trids that are connected to the target */
|
||||
static TAILQ_HEAD(, nvmf_tgt_host_trid) g_nvmf_tgt_host_trids =
|
||||
TAILQ_HEAD_INITIALIZER(g_nvmf_tgt_host_trids);
|
||||
|
||||
struct spdk_nvmf_tgt *g_spdk_nvmf_tgt = NULL;
|
||||
|
||||
static enum nvmf_tgt_state g_tgt_state;
|
||||
|
||||
static uint32_t g_tgt_core; /* Round-robin tracking of cores for qpair assignment */
|
||||
/* Round-Robin/IP-based tracking of cores for qpair assignment */
|
||||
static uint32_t g_tgt_core;
|
||||
|
||||
static struct nvmf_tgt_poll_group *g_poll_groups = NULL;
|
||||
static size_t g_num_poll_groups = 0;
|
||||
@ -111,6 +123,68 @@ nvmf_tgt_poll_group_add(void *arg1, void *arg2)
|
||||
spdk_nvmf_poll_group_add(pg->group, qpair);
|
||||
}
|
||||
|
||||
/* Round robin selection of cores */
|
||||
static uint32_t
|
||||
spdk_nvmf_get_core_rr(void)
|
||||
{
|
||||
uint32_t core;
|
||||
|
||||
core = g_tgt_core;
|
||||
g_tgt_core = spdk_env_get_next_core(core);
|
||||
if (g_tgt_core == UINT32_MAX) {
|
||||
g_tgt_core = spdk_env_get_first_core();
|
||||
}
|
||||
|
||||
return core;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
nvmf_tgt_get_qpair_core(struct spdk_nvmf_qpair *qpair)
|
||||
{
|
||||
struct spdk_nvme_transport_id trid;
|
||||
struct nvmf_tgt_host_trid *tmp_trid = NULL, *new_trid = NULL;
|
||||
int ret;
|
||||
uint32_t core = 0;
|
||||
|
||||
switch (g_spdk_nvmf_tgt_conf->conn_sched) {
|
||||
case CONNECT_SCHED_HOST_IP:
|
||||
ret = spdk_nvmf_qpair_get_peer_trid(qpair, &trid);
|
||||
if (ret) {
|
||||
SPDK_ERRLOG("Invalid host transport Id. Assigning to core %d\n", core);
|
||||
break;
|
||||
}
|
||||
|
||||
TAILQ_FOREACH(tmp_trid, &g_nvmf_tgt_host_trids, link) {
|
||||
if (tmp_trid && !strncmp(tmp_trid->host_trid.traddr,
|
||||
trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1)) {
|
||||
tmp_trid->ref++;
|
||||
core = tmp_trid->core;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!tmp_trid) {
|
||||
new_trid = calloc(1, sizeof(*new_trid));
|
||||
if (!new_trid) {
|
||||
SPDK_ERRLOG("Insufficient memory. Assigning to core %d\n", core);
|
||||
break;
|
||||
}
|
||||
/* Get the next available core for the new host */
|
||||
core = spdk_nvmf_get_core_rr();
|
||||
new_trid->core = core;
|
||||
memcpy(new_trid->host_trid.traddr, trid.traddr,
|
||||
SPDK_NVMF_TRADDR_MAX_LEN + 1);
|
||||
TAILQ_INSERT_TAIL(&g_nvmf_tgt_host_trids, new_trid, link);
|
||||
}
|
||||
break;
|
||||
case CONNECT_SCHED_ROUND_ROBIN:
|
||||
default:
|
||||
core = spdk_nvmf_get_core_rr();
|
||||
break;
|
||||
}
|
||||
|
||||
return core;
|
||||
}
|
||||
|
||||
static void
|
||||
new_qpair(struct spdk_nvmf_qpair *qpair)
|
||||
{
|
||||
@ -123,11 +197,7 @@ new_qpair(struct spdk_nvmf_qpair *qpair)
|
||||
return;
|
||||
}
|
||||
|
||||
core = g_tgt_core;
|
||||
g_tgt_core = spdk_env_get_next_core(core);
|
||||
if (g_tgt_core == UINT32_MAX) {
|
||||
g_tgt_core = spdk_env_get_first_core();
|
||||
}
|
||||
core = nvmf_tgt_get_qpair_core(qpair);
|
||||
|
||||
pg = &g_poll_groups[core];
|
||||
assert(pg != NULL);
|
||||
@ -222,7 +292,15 @@ nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
|
||||
static void
|
||||
nvmf_tgt_destroy_done(void *ctx, int status)
|
||||
{
|
||||
struct nvmf_tgt_host_trid *trid, *tmp_trid;
|
||||
|
||||
g_tgt_state = NVMF_TGT_STOPPED;
|
||||
|
||||
TAILQ_FOREACH_SAFE(trid, &g_nvmf_tgt_host_trids, link, tmp_trid) {
|
||||
TAILQ_REMOVE(&g_nvmf_tgt_host_trids, trid, link);
|
||||
free(trid);
|
||||
}
|
||||
|
||||
free(g_spdk_nvmf_tgt_conf);
|
||||
nvmf_tgt_advance_state();
|
||||
}
|
||||
@ -334,6 +412,16 @@ spdk_nvmf_subsystem_init(void)
|
||||
nvmf_tgt_advance_state();
|
||||
}
|
||||
|
||||
static char *
|
||||
get_conn_sched_string(enum spdk_nvmf_connect_sched sched)
|
||||
{
|
||||
if (sched == CONNECT_SCHED_HOST_IP) {
|
||||
return "hostip";
|
||||
} else {
|
||||
return "roundrobin";
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
spdk_nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w, struct spdk_event *done_ev)
|
||||
{
|
||||
@ -344,6 +432,8 @@ spdk_nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w, struct spdk
|
||||
|
||||
spdk_json_write_named_object_begin(w, "params");
|
||||
spdk_json_write_named_uint32(w, "acceptor_poll_rate", g_spdk_nvmf_tgt_conf->acceptor_poll_rate);
|
||||
spdk_json_write_named_string(w, "conn_sched",
|
||||
get_conn_sched_string(g_spdk_nvmf_tgt_conf->conn_sched));
|
||||
spdk_json_write_object_end(w);
|
||||
spdk_json_write_object_end(w);
|
||||
|
||||
|
@ -1238,10 +1238,15 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
|
||||
@call_cmd
|
||||
def set_nvmf_target_config(args):
|
||||
rpc.nvmf.set_nvmf_target_config(args.client,
|
||||
acceptor_poll_rate=args.acceptor_poll_rate)
|
||||
acceptor_poll_rate=args.acceptor_poll_rate,
|
||||
conn_sched=args.conn_sched)
|
||||
|
||||
p = subparsers.add_parser('set_nvmf_target_config', help='Set NVMf target config')
|
||||
p.add_argument('-r', '--acceptor-poll-rate', help='Polling interval of the acceptor for incoming connections (usec)', type=int)
|
||||
p.add_argument('-s', '--conn-sched', help="""'roundrobin' - Schedule the incoming connections from any host
|
||||
on the cores in a round robin manner (Default). 'hostip' - Schedule all the incoming connections from a
|
||||
specific host IP on to the same core. Connections from different IP will be assigned to cores in a round
|
||||
robin manner""")
|
||||
p.set_defaults(func=set_nvmf_target_config)
|
||||
|
||||
@call_cmd
|
||||
|
@ -37,11 +37,14 @@ def set_nvmf_target_options(client,
|
||||
return client.call('set_nvmf_target_options', params)
|
||||
|
||||
|
||||
def set_nvmf_target_config(client, acceptor_poll_rate=None):
|
||||
def set_nvmf_target_config(client,
|
||||
acceptor_poll_rate=None,
|
||||
conn_sched=None):
|
||||
"""Set NVMe-oF target subsystem configuration.
|
||||
|
||||
Args:
|
||||
acceptor_poll_rate: Acceptor poll period in microseconds (optional)
|
||||
conn_sched: Scheduling of incoming connections (optional)
|
||||
|
||||
Returns:
|
||||
True or False
|
||||
@ -50,6 +53,8 @@ def set_nvmf_target_config(client, acceptor_poll_rate=None):
|
||||
|
||||
if acceptor_poll_rate:
|
||||
params['acceptor_poll_rate'] = acceptor_poll_rate
|
||||
if conn_sched:
|
||||
params['conn_sched'] = conn_sched
|
||||
return client.call('set_nvmf_target_config', params)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user