sock: introduce SO_INCOMING_CPU to get placement_id

Leverage SO_INCOMING_CPU to get the CPU affinity of connections
(sockets). And allocate the connections to specific poll groups,
which aims to utilize cache locality.

From our test:
6 P4600 NVMe on target,target uses 8 cores, NIC irqs are bound to
these 8 cores, and initiator side uses 24 and 32 cores,
we can get 11%~17% randwrite performance boost for posix, and 8%~12%
for uring.

Change-Id: I011e0a21502c85adcccd4a14fbe9838b43f54976
Signed-off-by: Richael Zhuang <richael.zhuang@arm.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5748
Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Richael Zhuang 2020-12-15 15:05:44 +08:00 committed by Tomasz Zawadzki
parent 9713bfe90d
commit 201aa63471
10 changed files with 92 additions and 32 deletions

View File

@ -41,6 +41,13 @@ independent SPDK processes are running on one node. The filter function can
then be implemented in these processes to decide which SSDs to probe based on
the new SSD's PCI address.
### sock
The type of enable_placement_id in struct spdk_sock_impl_opts is changed from
bool to int. We can use RPC to configure different value of enable_placement_id.
Then we can leverage SO_INCOMING_CPU to get placement_id, which aims to utilize
CPU cache locality, enabled by setting enable_placement_id=2.
## v21.01:
### idxd

View File

@ -7731,7 +7731,7 @@ Example response:
"result": {
"recv_buf_size": 2097152,
"send_buf_size": 2097152,
"enable_recv_pipe": true
"enable_recv_pipe": true,
"enable_zerocopy_send": true
}
}
@ -7751,7 +7751,7 @@ send_buf_size | Optional | number | Size of socket send buffer in
enable_recv_pipe | Optional | boolean | Enable or disable receive pipe
enable_zerocopy_send | Optional | boolean | Enable or disable zero copy on send
enable_quick_ack | Optional | boolean | Enable or disable quick ACK
enable_placement_id | Optional | boolean | Enable or disable placement_id
enable_placement_id | Optional | number | Enable or disable placement_id. 0:disable,1:incoming_napi,2:incoming_cpu
### Response
@ -7773,7 +7773,7 @@ Example request:
"enable_recv_pipe": false,
"enable_zerocopy_send": true,
"enable_quick_ack": false,
"enable_placement_id": false
"enable_placement_id": 0
}
}
~~~

View File

@ -116,7 +116,7 @@ struct spdk_sock_impl_opts {
/**
* Enable or disable placement_id. Used by posix and uring socket modules.
*/
bool enable_placement_id;
uint32_t enable_placement_id;
};

View File

@ -36,6 +36,7 @@
#include "spdk/sock.h"
#include "spdk_internal/sock.h"
#include "spdk/log.h"
#include "spdk/env.h"
#define SPDK_SOCK_DEFAULT_PRIORITY 0
#define SPDK_SOCK_DEFAULT_ZCOPY true
@ -59,7 +60,7 @@ static pthread_mutex_t g_map_table_mutex = PTHREAD_MUTEX_INITIALIZER;
* If the group is already in the map, take a reference.
*/
static int
sock_map_insert(int placement_id, struct spdk_sock_group *group)
sock_map_insert(int placement_id, struct spdk_sock_group *group, bool init)
{
struct spdk_sock_placement_id_entry *entry;
@ -84,7 +85,9 @@ sock_map_insert(int placement_id, struct spdk_sock_group *group)
entry->placement_id = placement_id;
entry->group = group;
if (!init) {
entry->ref++;
}
STAILQ_INSERT_TAIL(&g_placement_id_map, entry, link);
pthread_mutex_unlock(&g_map_table_mutex);
@ -154,11 +157,11 @@ static int
sock_get_placement_id(struct spdk_sock *sock)
{
int rc;
int placement_id;
int placement_id = -1;
if (!sock->placement_id) {
if (sock->placement_id == -1) {
rc = sock->net_impl->get_placement_id(sock, &placement_id);
if (!rc && (placement_id != 0)) {
if (!rc && (placement_id != -1)) {
sock->placement_id = placement_id;
}
}
@ -169,10 +172,10 @@ sock_get_placement_id(struct spdk_sock *sock)
int
spdk_sock_get_optimal_sock_group(struct spdk_sock *sock, struct spdk_sock_group **group)
{
int placement_id;
int placement_id = -1;
placement_id = sock_get_placement_id(sock);
if (placement_id != 0) {
if (placement_id != -1) {
sock_map_lookup(placement_id, group);
return 0;
} else {
@ -336,6 +339,7 @@ spdk_sock_accept(struct spdk_sock *sock)
new_sock->opts = sock->opts;
memcpy(&new_sock->opts, &sock->opts, sizeof(new_sock->opts));
new_sock->net_impl = sock->net_impl;
new_sock->placement_id = -1;
TAILQ_INIT(&new_sock->queued_reqs);
TAILQ_INIT(&new_sock->pending_reqs);
}
@ -480,6 +484,9 @@ spdk_sock_group_create(void *ctx)
struct spdk_net_impl *impl = NULL;
struct spdk_sock_group *group;
struct spdk_sock_group_impl *group_impl;
struct spdk_sock_impl_opts sock_opts = {};
size_t sock_len;
bool enable_incoming_cpu = 0;
group = calloc(1, sizeof(*group));
if (group == NULL) {
@ -494,10 +501,22 @@ spdk_sock_group_create(void *ctx)
STAILQ_INSERT_TAIL(&group->group_impls, group_impl, link);
TAILQ_INIT(&group_impl->socks);
group_impl->net_impl = impl;
sock_len = sizeof(sock_opts);
spdk_sock_impl_get_opts(impl->name, &sock_opts, &sock_len);
if (sock_opts.enable_placement_id == 2) {
enable_incoming_cpu = 1;
}
}
}
group->ctx = ctx;
/* if any net_impl is configured to use SO_INCOMING_CPU, initialize the sock map */
if (enable_incoming_cpu) {
sock_map_insert(spdk_env_get_current_core(), group, 1);
}
return group;
}
@ -534,7 +553,7 @@ spdk_sock_group_add_sock(struct spdk_sock_group *group, struct spdk_sock *sock,
placement_id = sock_get_placement_id(sock);
if (placement_id != 0) {
rc = sock_map_insert(placement_id, group);
rc = sock_map_insert(placement_id, group, 0);
if (rc < 0) {
return -1;
}
@ -794,7 +813,7 @@ spdk_sock_write_config_json(struct spdk_json_write_ctx *w)
spdk_json_write_named_bool(w, "enable_recv_pipe", opts.enable_recv_pipe);
spdk_json_write_named_bool(w, "enable_zerocopy_send", opts.enable_zerocopy_send);
spdk_json_write_named_bool(w, "enable_quickack", opts.enable_quickack);
spdk_json_write_named_bool(w, "enable_placement_id", opts.enable_placement_id);
spdk_json_write_named_uint32(w, "enable_placement_id", opts.enable_placement_id);
spdk_json_write_object_end(w);
spdk_json_write_object_end(w);
} else {

View File

@ -76,7 +76,7 @@ rpc_sock_impl_get_options(struct spdk_jsonrpc_request *request,
spdk_json_write_named_bool(w, "enable_recv_pipe", sock_opts.enable_recv_pipe);
spdk_json_write_named_bool(w, "enable_zerocopy_send", sock_opts.enable_zerocopy_send);
spdk_json_write_named_bool(w, "enable_quickack", sock_opts.enable_quickack);
spdk_json_write_named_bool(w, "enable_placement_id", sock_opts.enable_placement_id);
spdk_json_write_named_uint32(w, "enable_placement_id", sock_opts.enable_placement_id);
spdk_json_write_object_end(w);
spdk_jsonrpc_end_result(request, w);
free(impl_name);
@ -116,7 +116,7 @@ static const struct spdk_json_object_decoder rpc_sock_impl_set_opts_decoders[] =
},
{
"enable_placement_id", offsetof(struct spdk_rpc_sock_impl_set_opts, sock_opts.enable_placement_id),
spdk_json_decode_bool, true
spdk_json_decode_uint32, true
},
};

View File

@ -86,7 +86,7 @@ static struct spdk_sock_impl_opts g_spdk_posix_sock_impl_opts = {
.enable_recv_pipe = true,
.enable_zerocopy_send = true,
.enable_quickack = false,
.enable_placement_id = false,
.enable_placement_id = 0,
};
static int
@ -1092,16 +1092,34 @@ posix_sock_get_placement_id(struct spdk_sock *_sock, int *placement_id)
return rc;
}
if (g_spdk_posix_sock_impl_opts.enable_placement_id != 0) {
switch (g_spdk_posix_sock_impl_opts.enable_placement_id) {
case 1: {
#if defined(SO_INCOMING_NAPI_ID)
struct spdk_posix_sock *sock = __posix_sock(_sock);
socklen_t salen = sizeof(int);
socklen_t len = sizeof(int);
rc = getsockopt(sock->fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, placement_id, &len);
#endif
break;
}
case 2: {
#if defined(SO_INCOMING_CPU)
struct spdk_posix_sock *sock = __posix_sock(_sock);
socklen_t len = sizeof(int);
rc = getsockopt(sock->fd, SOL_SOCKET, SO_INCOMING_CPU, placement_id, &len);
#endif
break;
}
default:
break;
}
}
rc = getsockopt(sock->fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, placement_id, &salen);
if (rc != 0) {
SPDK_ERRLOG("getsockopt() failed (errno=%d)\n", errno);
}
#endif
return rc;
}

View File

@ -102,7 +102,7 @@ static struct spdk_sock_impl_opts g_spdk_uring_sock_impl_opts = {
.send_buf_size = MIN_SO_SNDBUF_SIZE,
.enable_recv_pipe = true,
.enable_quickack = false,
.enable_placement_id = false,
.enable_placement_id = 0,
};
#define SPDK_URING_SOCK_REQUEST_IOV(req) ((struct iovec *)((uint8_t *)req + sizeof(struct spdk_sock_request)))
@ -1098,16 +1098,34 @@ uring_sock_get_placement_id(struct spdk_sock *_sock, int *placement_id)
return rc;
}
if (g_spdk_uring_sock_impl_opts.enable_placement_id != 0) {
switch (g_spdk_uring_sock_impl_opts.enable_placement_id) {
case 1: {
#if defined(SO_INCOMING_NAPI_ID)
struct spdk_uring_sock *sock = __uring_sock(_sock);
socklen_t salen = sizeof(int);
socklen_t len = sizeof(int);
rc = getsockopt(sock->fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, placement_id, &len);
#endif
break;
}
case 2: {
#if defined(SO_INCOMING_CPU)
struct spdk_uring_sock *sock = __uring_sock(_sock);
socklen_t len = sizeof(int);
rc = getsockopt(sock->fd, SOL_SOCKET, SO_INCOMING_CPU, placement_id, &len);
#endif
break;
}
default:
break;
}
}
rc = getsockopt(sock->fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, placement_id, &salen);
if (rc != 0) {
SPDK_ERRLOG("getsockopt() failed (errno=%d)\n", errno);
}
#endif
return rc;
}

View File

@ -2587,6 +2587,7 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
p.add_argument('-i', '--impl', help='Socket implementation name, e.g. posix', required=True)
p.add_argument('-r', '--recv-buf-size', help='Size of receive buffer on socket in bytes', type=int)
p.add_argument('-s', '--send-buf-size', help='Size of send buffer on socket in bytes', type=int)
p.add_argument('-p', '--enable-placement-id', help='Option for placement-id. 0:disable,1:incoming_napi,2:incoming_cpu', type=int)
p.add_argument('--enable-recv-pipe', help='Enable receive pipe',
action='store_true', dest='enable_recv_pipe')
p.add_argument('--disable-recv-pipe', help='Disable receive pipe',
@ -2599,10 +2600,6 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
action='store_true', dest='enable_quickack')
p.add_argument('--disable-quickack', help='Disable quick ACK',
action='store_false', dest='enable_quickack')
p.add_argument('--enable-placement_id', help='Enable placement_id',
action='store_true', dest='enable_placement_id')
p.add_argument('--disable-placement_id', help='Disable placement_id',
action='store_false', dest='enable_placement_id')
p.set_defaults(func=sock_impl_set_options, enable_recv_pipe=None, enable_zerocopy_send=None,
enable_quickack=None, enable_placement_id=None)

View File

@ -28,7 +28,7 @@ def sock_impl_set_options(client,
enable_recv_pipe: enable or disable receive pipe (optional)
enable_zerocopy_send: enable or disable zerocopy on send (optional)
enable_quickack: enable or disable quickack (optional)
enable_placement_id: enable or disable placement_id (optional)
enable_placement_id: option for placement_id. 0:disable,1:incoming_napi,2:incoming_cpu (optional)
"""
params = {}

View File

@ -42,6 +42,7 @@
#include "sock/posix/posix.c"
#include "spdk_internal/mock.h"
#include "common/lib/test_env.c"
#define UT_IP "test_ip"
#define UT_PORT 1234