nvme/rdma: Create memory domain per Protection Domain

Add a global list of memory domains with reference counter.
Memory domains are used by NVME RDMA qpairs.

Also refactor ibv_resize_cq in nvme_rdma_ut.c to stub

Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Change-Id: Ie58b7e99fcb2c57c967f5dee0417e74845d9e2d1
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8127
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Paul Luse <paul.e.luse@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
This commit is contained in:
Alexey Marchuk 2021-05-19 12:36:18 +03:00 committed by Tomasz Zawadzki
parent 4e52791092
commit d06b6097e3
3 changed files with 177 additions and 5 deletions

View File

@ -3,6 +3,7 @@
*
* Copyright (c) Intel Corporation. All rights reserved.
* Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
* Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -38,6 +39,7 @@
#include "spdk/stdinc.h"
#include "spdk/assert.h"
#include "spdk/dma.h"
#include "spdk/log.h"
#include "spdk/trace.h"
#include "spdk/queue.h"
@ -102,6 +104,13 @@
#define WC_PER_QPAIR(queue_depth) (queue_depth * 2)
struct nvme_rdma_memory_domain {
TAILQ_ENTRY(nvme_rdma_memory_domain) link;
uint32_t ref;
struct ibv_pd *pd;
struct spdk_memory_domain *domain;
};
enum nvme_rdma_wr_type {
RDMA_WR_TYPE_RECV,
RDMA_WR_TYPE_SEND,
@ -223,6 +232,8 @@ struct nvme_rdma_qpair {
TAILQ_HEAD(, spdk_nvme_rdma_req) free_reqs;
TAILQ_HEAD(, spdk_nvme_rdma_req) outstanding_reqs;
struct nvme_rdma_memory_domain *memory_domain;
/* Counts of outstanding send and recv objects */
uint16_t current_num_recvs;
uint16_t current_num_sends;
@ -289,6 +300,77 @@ static const char *rdma_cm_event_str[] = {
struct nvme_rdma_qpair *nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group,
uint32_t qp_num);
static TAILQ_HEAD(, nvme_rdma_memory_domain) g_memory_domains = TAILQ_HEAD_INITIALIZER(
g_memory_domains);
static pthread_mutex_t g_memory_domains_lock = PTHREAD_MUTEX_INITIALIZER;
static struct nvme_rdma_memory_domain *
nvme_rdma_get_memory_domain(struct ibv_pd *pd)
{
struct nvme_rdma_memory_domain *domain = NULL;
struct spdk_memory_domain_ctx dev_ctx;
int rc;
pthread_mutex_lock(&g_memory_domains_lock);
TAILQ_FOREACH(domain, &g_memory_domains, link) {
if (domain->pd == pd) {
domain->ref++;
pthread_mutex_unlock(&g_memory_domains_lock);
return domain;
}
}
domain = calloc(1, sizeof(*domain));
if (!domain) {
SPDK_ERRLOG("Memory allocation failed\n");
pthread_mutex_unlock(&g_memory_domains_lock);
return NULL;
}
dev_ctx.size = sizeof(dev_ctx);
dev_ctx.rdma.ibv_pd = pd;
rc = spdk_memory_domain_create(&domain->domain, SPDK_DMA_DEVICE_TYPE_RDMA, &dev_ctx,
SPDK_RDMA_DMA_DEVICE);
if (rc) {
SPDK_ERRLOG("Failed to create memory domain\n");
free(domain);
pthread_mutex_unlock(&g_memory_domains_lock);
return NULL;
}
domain->pd = pd;
domain->ref = 1;
TAILQ_INSERT_TAIL(&g_memory_domains, domain, link);
pthread_mutex_unlock(&g_memory_domains_lock);
return domain;
}
static void
nvme_rdma_put_memory_domain(struct nvme_rdma_memory_domain *device)
{
if (!device) {
return;
}
pthread_mutex_lock(&g_memory_domains_lock);
assert(device->ref > 0);
device->ref--;
if (device->ref == 0) {
spdk_memory_domain_destroy(device->domain);
TAILQ_REMOVE(&g_memory_domains, device, link);
free(device);
}
pthread_mutex_unlock(&g_memory_domains_lock);
}
static inline void *
nvme_rdma_calloc(size_t nmemb, size_t size)
{
@ -625,6 +707,12 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
return -1;
}
rqpair->memory_domain = nvme_rdma_get_memory_domain(rqpair->rdma_qp->qp->pd);
if (!rqpair->memory_domain) {
SPDK_ERRLOG("Failed to get memory domain\n");
return -1;
}
/* ibv_create_qp will change the values in attr.cap. Make sure we store the proper value. */
rqpair->max_send_sge = spdk_min(NVME_RDMA_DEFAULT_TX_SGE, attr.cap.max_send_sge);
rqpair->max_recv_sge = spdk_min(NVME_RDMA_DEFAULT_RX_SGE, attr.cap.max_recv_sge);
@ -1712,6 +1800,8 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
nvme_rdma_qpair_abort_reqs(qpair, 1);
nvme_qpair_deinit(qpair);
nvme_rdma_put_memory_domain(rqpair->memory_domain);
nvme_rdma_free_reqs(rqpair);
nvme_rdma_free_rsps(rqpair);
nvme_rdma_free(rqpair);

View File

@ -64,7 +64,7 @@ DEPDIRS-thread := log util trace
DEPDIRS-nvme := log sock util
ifeq ($(CONFIG_RDMA),y)
DEPDIRS-nvme += rdma
DEPDIRS-nvme += rdma dma
endif
ifeq ($(CONFIG_VFIO_USER),y)
DEPDIRS-nvme += vfio_user

View File

@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
* Copyright (c) Intel Corporation. All rights reserved.
* Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -60,12 +60,35 @@ DEFINE_STUB(fcntl, int, (int fd, int cmd, ...), 0);
DEFINE_STUB_V(rdma_destroy_event_channel, (struct rdma_event_channel *channel));
DEFINE_STUB(ibv_dereg_mr, int, (struct ibv_mr *mr), 0);
DEFINE_STUB(ibv_resize_cq, int, (struct ibv_cq *cq, int cqe), 0);
int ibv_resize_cq(struct ibv_cq *cq, int cqe)
DEFINE_RETURN_MOCK(spdk_memory_domain_create, int);
int
spdk_memory_domain_create(struct spdk_memory_domain **domain, enum spdk_dma_device_type type,
struct spdk_memory_domain_ctx *ctx, const char *id)
{
static struct spdk_memory_domain *__dma_dev = (struct spdk_memory_domain *)0xdeaddead;
HANDLE_RETURN_MOCK(spdk_memory_domain_create);
*domain = __dma_dev;
return 0;
}
DEFINE_STUB(spdk_memory_domain_get_context, struct spdk_memory_domain_ctx *,
(struct spdk_memory_domain *device), NULL);
DEFINE_STUB(spdk_memory_domain_get_dma_device_type, enum spdk_dma_device_type,
(struct spdk_memory_domain *device), SPDK_DMA_DEVICE_TYPE_RDMA);
DEFINE_STUB_V(spdk_memory_domain_destroy, (struct spdk_memory_domain *device));
DEFINE_STUB(spdk_memory_domain_fetch_data, int, (struct spdk_memory_domain *src_domain,
void *src_domain_ctx, struct iovec *src_iov, uint32_t src_iov_cnt, struct iovec *dst_iov,
uint32_t dst_iov_cnt, spdk_memory_domain_fetch_data_cpl_cb cpl_cb, void *cpl_cb_arg), 0);
DEFINE_STUB(spdk_memory_domain_translate_data, int, (struct spdk_memory_domain *src_domain,
void *src_domain_ctx, struct spdk_memory_domain *dst_domain,
struct spdk_memory_domain_translation_ctx *dst_domain_ctx, void *addr, size_t len,
struct spdk_memory_domain_translation_result *result), 0);
/* ibv_reg_mr can be a macro, need to undefine it */
#ifdef ibv_reg_mr
#undef ibv_reg_mr
@ -1055,7 +1078,8 @@ test_nvme_rdma_qpair_init(void)
{
struct nvme_rdma_qpair rqpair = {};
struct rdma_cm_id cm_id = {};
struct ibv_qp qp = {};
struct ibv_pd *pd = (struct ibv_pd *)0xfeedbeef;
struct ibv_qp qp = { .pd = pd };
struct nvme_rdma_ctrlr rctrlr = {};
int rc = 0;
@ -1075,6 +1099,7 @@ test_nvme_rdma_qpair_init(void)
CU_ASSERT(rqpair.current_num_sends == 0);
CU_ASSERT(rqpair.current_num_recvs == 0);
CU_ASSERT(rqpair.cq == (struct ibv_cq *)0xFEEDBEEF);
CU_ASSERT(rqpair.memory_domain != NULL);
}
static void
@ -1119,6 +1144,62 @@ test_nvme_rdma_qpair_submit_request(void)
nvme_rdma_free_reqs(&rqpair);
}
static void
test_nvme_rdma_memory_domain(void)
{
struct nvme_rdma_memory_domain *domain_1 = NULL, *domain_2 = NULL, *domain_tmp;
struct ibv_pd *pd_1 = (struct ibv_pd *)0x1, *pd_2 = (struct ibv_pd *)0x2;
/* Counters below are used to check the number of created/destroyed rdma_dma_device objects.
* Since other unit tests may create dma_devices, we can't just check that the queue is empty or not */
uint32_t dma_dev_count_start = 0, dma_dev_count = 0, dma_dev_count_end = 0;
TAILQ_FOREACH(domain_tmp, &g_memory_domains, link) {
dma_dev_count_start++;
}
/* spdk_memory_domain_create failed, expect fail */
MOCK_SET(spdk_memory_domain_create, -1);
domain_1 = nvme_rdma_get_memory_domain(pd_1);
CU_ASSERT(domain_1 == NULL);
MOCK_CLEAR(spdk_memory_domain_create);
/* Normal scenario */
domain_1 = nvme_rdma_get_memory_domain(pd_1);
SPDK_CU_ASSERT_FATAL(domain_1 != NULL);
CU_ASSERT(domain_1->domain != NULL);
CU_ASSERT(domain_1->pd == pd_1);
CU_ASSERT(domain_1->ref == 1);
/* Request the same pd, ref counter increased */
CU_ASSERT(nvme_rdma_get_memory_domain(pd_1) == domain_1);
CU_ASSERT(domain_1->ref == 2);
/* Request another pd */
domain_2 = nvme_rdma_get_memory_domain(pd_2);
SPDK_CU_ASSERT_FATAL(domain_2 != NULL);
CU_ASSERT(domain_2->domain != NULL);
CU_ASSERT(domain_2->pd == pd_2);
CU_ASSERT(domain_2->ref == 1);
TAILQ_FOREACH(domain_tmp, &g_memory_domains, link) {
dma_dev_count++;
}
CU_ASSERT(dma_dev_count == dma_dev_count_start + 2);
/* put domain_1, decrement refcount */
nvme_rdma_put_memory_domain(domain_1);
/* Release both devices */
CU_ASSERT(domain_2->ref == 1);
nvme_rdma_put_memory_domain(domain_1);
nvme_rdma_put_memory_domain(domain_2);
TAILQ_FOREACH(domain_tmp, &g_memory_domains, link) {
dma_dev_count_end++;
}
CU_ASSERT(dma_dev_count_start == dma_dev_count_end);
}
int main(int argc, char **argv)
{
CU_pSuite suite = NULL;
@ -1147,6 +1228,7 @@ int main(int argc, char **argv)
CU_ADD_TEST(suite, test_nvme_rdma_parse_addr);
CU_ADD_TEST(suite, test_nvme_rdma_qpair_init);
CU_ADD_TEST(suite, test_nvme_rdma_qpair_submit_request);
CU_ADD_TEST(suite, test_nvme_rdma_memory_domain);
CU_basic_set_mode(CU_BRM_VERBOSE);
CU_basic_run_tests();