numam-spdk/lib/rdma/common.c
Alexey Marchuk 01887ccc30 rdma: Set REMOTE_WRITE permission for iWARP on target side
iWARP requires REMOTE_WRITE permission ofr RDMA_READ
operation.

Fixes #2253

Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Change-Id: Iddfda091903e000d1c4839c1fce67bf25d4a13c2
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10392
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Dong Yi <dongx.yi@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2021-11-30 09:08:21 +00:00

381 lines
9.6 KiB
C

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation. All rights reserved.
* Copyright (c) 2020, 2021 Mellanox Technologies LTD. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <rdma/rdma_cma.h>
#include "spdk/log.h"
#include "spdk/env.h"
#include "spdk/string.h"
#include "spdk/likely.h"
#include "spdk_internal/rdma.h"
#include "spdk_internal/assert.h"
struct spdk_rdma_mem_map {
struct spdk_mem_map *map;
struct ibv_pd *pd;
struct spdk_nvme_rdma_hooks *hooks;
uint32_t ref_count;
enum spdk_rdma_memory_map_role role;
LIST_ENTRY(spdk_rdma_mem_map) link;
};
static LIST_HEAD(, spdk_rdma_mem_map) g_rdma_mr_maps = LIST_HEAD_INITIALIZER(&g_rdma_mr_maps);
static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER;
static int
rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map,
enum spdk_mem_map_notify_action action,
void *vaddr, size_t size)
{
struct spdk_rdma_mem_map *rmap = cb_ctx;
struct ibv_pd *pd = rmap->pd;
struct ibv_mr *mr;
uint32_t access_flags = 0;
int rc;
switch (action) {
case SPDK_MEM_MAP_NOTIFY_REGISTER:
if (rmap->hooks && rmap->hooks->get_rkey) {
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, rmap->hooks->get_rkey(pd, vaddr,
size));
} else {
switch (rmap->role) {
case SPDK_RDMA_MEMORY_MAP_ROLE_TARGET:
access_flags = IBV_ACCESS_LOCAL_WRITE;
if (pd->context->device->transport_type == IBV_TRANSPORT_IWARP) {
/* IWARP requires REMOTE_WRITE permission for RDMA_READ operation */
access_flags |= IBV_ACCESS_REMOTE_WRITE;
}
break;
case SPDK_RDMA_MEMORY_MAP_ROLE_INITIATOR:
access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
break;
default:
SPDK_UNREACHABLE();
}
mr = ibv_reg_mr(pd, vaddr, size, access_flags);
if (mr == NULL) {
SPDK_ERRLOG("ibv_reg_mr() failed\n");
return -1;
} else {
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
}
}
break;
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
if (rmap->hooks == NULL || rmap->hooks->get_rkey == NULL) {
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
if (mr) {
ibv_dereg_mr(mr);
}
}
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
break;
default:
SPDK_UNREACHABLE();
}
return rc;
}
static int
rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2)
{
/* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */
return addr_1 == addr_2;
}
const struct spdk_mem_map_ops g_rdma_map_ops = {
.notify_cb = rdma_mem_notify,
.are_contiguous = rdma_check_contiguous_entries
};
static void
_rdma_free_mem_map(struct spdk_rdma_mem_map *map)
{
assert(map);
if (map->hooks) {
spdk_free(map);
} else {
free(map);
}
}
struct spdk_rdma_mem_map *
spdk_rdma_create_mem_map(struct ibv_pd *pd, struct spdk_nvme_rdma_hooks *hooks,
enum spdk_rdma_memory_map_role role)
{
struct spdk_rdma_mem_map *map;
pthread_mutex_lock(&g_rdma_mr_maps_mutex);
/* Look up existing mem map registration for this pd */
LIST_FOREACH(map, &g_rdma_mr_maps, link) {
if (map->pd == pd && map->role == role) {
map->ref_count++;
pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
return map;
}
}
if (hooks) {
map = spdk_zmalloc(sizeof(*map), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
} else {
map = calloc(1, sizeof(*map));
}
if (!map) {
pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
SPDK_ERRLOG("Memory allocation failed\n");
return NULL;
}
map->pd = pd;
map->ref_count = 1;
map->hooks = hooks;
map->role = role;
map->map = spdk_mem_map_alloc(0, &g_rdma_map_ops, map);
if (!map->map) {
SPDK_ERRLOG("Unable to create memory map\n");
_rdma_free_mem_map(map);
pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
return NULL;
}
LIST_INSERT_HEAD(&g_rdma_mr_maps, map, link);
pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
return map;
}
void
spdk_rdma_free_mem_map(struct spdk_rdma_mem_map **_map)
{
struct spdk_rdma_mem_map *map;
if (!_map) {
return;
}
map = *_map;
if (!map) {
return;
}
*_map = NULL;
pthread_mutex_lock(&g_rdma_mr_maps_mutex);
assert(map->ref_count > 0);
map->ref_count--;
if (map->ref_count != 0) {
pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
return;
}
LIST_REMOVE(map, link);
pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
if (map->map) {
spdk_mem_map_free(&map->map);
}
_rdma_free_mem_map(map);
}
int
spdk_rdma_get_translation(struct spdk_rdma_mem_map *map, void *address,
size_t length, struct spdk_rdma_memory_translation *translation)
{
uint64_t real_length = length;
assert(map);
assert(address);
assert(translation);
if (map->hooks && map->hooks->get_rkey) {
translation->translation_type = SPDK_RDMA_TRANSLATION_KEY;
translation->mr_or_key.key = spdk_mem_map_translate(map->map, (uint64_t)address, &real_length);
} else {
translation->translation_type = SPDK_RDMA_TRANSLATION_MR;
translation->mr_or_key.mr = (struct ibv_mr *)spdk_mem_map_translate(map->map, (uint64_t)address,
&real_length);
if (spdk_unlikely(!translation->mr_or_key.mr)) {
SPDK_ERRLOG("No translation for ptr %p, size %zu\n", address, length);
return -EINVAL;
}
}
assert(real_length >= length);
return 0;
}
struct spdk_rdma_srq *
spdk_rdma_srq_create(struct spdk_rdma_srq_init_attr *init_attr)
{
assert(init_attr);
assert(init_attr->pd);
struct spdk_rdma_srq *rdma_srq = calloc(1, sizeof(*rdma_srq));
if (!rdma_srq) {
SPDK_ERRLOG("Can't allocate memory for SRQ handle\n");
return NULL;
}
if (init_attr->stats) {
rdma_srq->stats = init_attr->stats;
rdma_srq->shared_stats = true;
} else {
rdma_srq->stats = calloc(1, sizeof(*rdma_srq->stats));
if (!rdma_srq->stats) {
SPDK_ERRLOG("SRQ statistics memory allocation failed");
free(rdma_srq);
return NULL;
}
}
rdma_srq->srq = ibv_create_srq(init_attr->pd, &init_attr->srq_init_attr);
if (!rdma_srq->srq) {
if (!init_attr->stats) {
free(rdma_srq->stats);
}
SPDK_ERRLOG("Unable to create SRQ, errno %d (%s)\n", errno, spdk_strerror(errno));
free(rdma_srq);
return NULL;
}
return rdma_srq;
}
int
spdk_rdma_srq_destroy(struct spdk_rdma_srq *rdma_srq)
{
int rc;
if (!rdma_srq) {
return 0;
}
assert(rdma_srq->srq);
if (rdma_srq->recv_wrs.first != NULL) {
SPDK_WARNLOG("Destroying RDMA SRQ with queued recv WRs\n");
}
rc = ibv_destroy_srq(rdma_srq->srq);
if (rc) {
SPDK_ERRLOG("SRQ destroy failed with %d\n", rc);
}
if (!rdma_srq->shared_stats) {
free(rdma_srq->stats);
}
free(rdma_srq);
return rc;
}
static inline bool
rdma_queue_recv_wrs(struct spdk_rdma_recv_wr_list *recv_wrs, struct ibv_recv_wr *first,
struct spdk_rdma_wr_stats *recv_stats)
{
struct ibv_recv_wr *last;
recv_stats->num_submitted_wrs++;
last = first;
while (last->next != NULL) {
last = last->next;
recv_stats->num_submitted_wrs++;
}
if (recv_wrs->first == NULL) {
recv_wrs->first = first;
recv_wrs->last = last;
return true;
} else {
recv_wrs->last->next = first;
recv_wrs->last = last;
return false;
}
}
bool
spdk_rdma_srq_queue_recv_wrs(struct spdk_rdma_srq *rdma_srq, struct ibv_recv_wr *first)
{
assert(rdma_srq);
assert(first);
return rdma_queue_recv_wrs(&rdma_srq->recv_wrs, first, rdma_srq->stats);
}
int
spdk_rdma_srq_flush_recv_wrs(struct spdk_rdma_srq *rdma_srq, struct ibv_recv_wr **bad_wr)
{
int rc;
if (spdk_unlikely(rdma_srq->recv_wrs.first == NULL)) {
return 0;
}
rc = ibv_post_srq_recv(rdma_srq->srq, rdma_srq->recv_wrs.first, bad_wr);
rdma_srq->recv_wrs.first = NULL;
rdma_srq->stats->doorbell_updates++;
return rc;
}
bool
spdk_rdma_qp_queue_recv_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_recv_wr *first)
{
assert(spdk_rdma_qp);
assert(first);
return rdma_queue_recv_wrs(&spdk_rdma_qp->recv_wrs, first, &spdk_rdma_qp->stats->recv);
}
int
spdk_rdma_qp_flush_recv_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_recv_wr **bad_wr)
{
int rc;
if (spdk_unlikely(spdk_rdma_qp->recv_wrs.first == NULL)) {
return 0;
}
rc = ibv_post_recv(spdk_rdma_qp->qp, spdk_rdma_qp->recv_wrs.first, bad_wr);
spdk_rdma_qp->recv_wrs.first = NULL;
spdk_rdma_qp->stats->recv.doorbell_updates++;
return rc;
}