nvme: Add mechanism to override RDMA pd/mr behavior

Add a mechanism to modify the RDMA transport's behavior
when creating protection domains and registering memory.
This is entirely optional.

Change-Id: I7cd850e76a673bf5521ca4815b779c53ab9567e8
Signed-off-by: zkhatami88 <z.khatami88@gmail.com>
Reviewed-on: https://review.gerrithub.io/421415
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
zkhatami88 2018-08-06 16:27:13 -07:00 committed by Ben Walker
parent 161af0b5cb
commit 9fb6947617
2 changed files with 140 additions and 28 deletions

View File

@ -44,6 +44,7 @@
extern "C" {
#endif
#include "spdk/config.h"
#include "spdk/env.h"
#include "spdk/nvme_spec.h"
#include "spdk/nvmf_spec.h"
@ -2038,6 +2039,60 @@ void spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *qpair,
uint8_t opc);
#ifdef SPDK_CONFIG_RDMA
struct ibv_context;
struct ibv_pd;
struct ibv_mr;
/**
* RDMA Transport Hooks
*/
struct spdk_nvme_rdma_hooks {
/**
* \brief Get a transport id specific context to be passed to
* the other hooks.
*
* \param trid the transport id
*
* \return ctx to be passed to the other hooks
*/
void *(*get_ctx)(const struct spdk_nvme_transport_id *trid);
/**
* \brief Get an InfiniBand Verbs protection domain.
*
* \param ctx Context returned from get_hook_ctx.
* \param verbs Infiniband verbs context
*
* \return pd of the nvme ctrlr
*/
struct ibv_pd *(*get_ibv_pd)(void *ctx, struct ibv_context *verbs);
/**
* \brief Get an InfiniBand Verbs memory region for a buffer.
*
* \param ctx Context returned from get_hook_ctx.
* \param buf Memory buffer for which an rkey should be returned.
* \param size size of buf
*
* \return Infiniband remote key (rkey) for this buf
*/
uint64_t (*get_rkey)(void *ctx, void *buf, size_t size);
};
/**
* \brief Set the global hooks for the RDMA transport, if necessary.
*
* This call is optional and must be performed prior to probing for
* any devices. By default, the RDMA transport will use the ibverbs
* library to create protection domains and register memory. This
* is a mechanism to subvert that and use an existing registration.
*
* \param hooks for initializing global hooks
*/
void spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks);
#endif
#ifdef __cplusplus
}

View File

@ -71,6 +71,8 @@ struct spdk_nvmf_cmd {
struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
};
struct spdk_nvme_rdma_hooks g_nvme_hooks = {};
/* Mapping from virtual address to ibv_mr pointer for a protection domain */
struct spdk_nvme_rdma_mr_map {
struct ibv_pd *pd;
@ -82,6 +84,10 @@ struct spdk_nvme_rdma_mr_map {
/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
struct nvme_rdma_ctrlr {
struct spdk_nvme_ctrlr ctrlr;
struct spdk_nvme_rdma_hooks hooks;
void *hook_ctx;
struct ibv_pd *pd;
};
/* NVMe RDMA qpair extensions for spdk_nvme_qpair */
@ -241,6 +247,7 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
{
int rc;
struct ibv_qp_init_attr attr;
struct nvme_rdma_ctrlr *rctrlr;
rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0);
if (!rqpair->cq) {
@ -248,6 +255,13 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
return -1;
}
rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
if (rctrlr->hooks.get_ibv_pd) {
rctrlr->pd = rctrlr->hooks.get_ibv_pd(rctrlr->hook_ctx, rqpair->cm_id->verbs);
} else {
rctrlr->pd = NULL;
}
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
attr.qp_type = IBV_QPT_RC;
attr.send_cq = rqpair->cq;
@ -257,11 +271,12 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
attr.cap.max_send_sge = NVME_RDMA_DEFAULT_TX_SGE;
attr.cap.max_recv_sge = NVME_RDMA_DEFAULT_RX_SGE;
rc = rdma_create_qp(rqpair->cm_id, NULL, &attr);
rc = rdma_create_qp(rqpair->cm_id, rctrlr->pd, &attr);
if (rc) {
SPDK_ERRLOG("rdma_create_qp failed\n");
return -1;
}
rctrlr->pd = rqpair->cm_id->qp->pd;
rqpair->cm_id->context = &rqpair->qpair;
@ -611,29 +626,38 @@ nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
enum spdk_mem_map_notify_action action,
void *vaddr, size_t size)
{
struct ibv_pd *pd = cb_ctx;
struct nvme_rdma_ctrlr *rctrlr = cb_ctx;
struct ibv_pd *pd;
struct ibv_mr *mr;
int rc;
switch (action) {
case SPDK_MEM_MAP_NOTIFY_REGISTER:
mr = ibv_reg_mr(pd, vaddr, size,
IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_REMOTE_WRITE);
if (mr == NULL) {
SPDK_ERRLOG("ibv_reg_mr() failed\n");
return -EFAULT;
if (!rctrlr->hooks.get_rkey) {
pd = rctrlr->pd;
mr = ibv_reg_mr(pd, vaddr, size,
IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_REMOTE_WRITE);
if (mr == NULL) {
SPDK_ERRLOG("ibv_reg_mr() failed\n");
return -EFAULT;
} else {
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
}
} else {
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
rctrlr->hooks.get_rkey(rctrlr->hook_ctx, vaddr, size));
}
break;
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
if (mr) {
ibv_dereg_mr(mr);
if (!rctrlr->hooks.get_rkey) {
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
if (mr) {
ibv_dereg_mr(mr);
}
}
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
break;
default:
SPDK_UNREACHABLE();
@ -673,7 +697,8 @@ nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair)
mr_map->ref = 1;
mr_map->pd = pd;
mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops, pd);
mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops,
nvme_rdma_ctrlr(rqpair->qpair.ctrlr));
if (mr_map->map == NULL) {
SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
free(mr_map);
@ -918,9 +943,21 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
requested_size = req->payload_size;
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
&requested_size);
if (mr == NULL || requested_size < req->payload_size) {
if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
&requested_size);
if (mr == NULL) {
return -1;
}
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
} else {
req->cmd.dptr.sgl1.keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
(uint64_t)payload,
&requested_size);
}
if (requested_size < req->payload_size) {
return -1;
}
@ -937,7 +974,6 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
req->cmd.dptr.sgl1.keyed.length = req->payload_size;
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
req->cmd.dptr.sgl1.address = (uint64_t)payload;
return 0;
@ -977,17 +1013,27 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
sge_length = spdk_min(remaining_size, sge_length);
mr_length = sge_length;
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr,
&mr_length);
if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map,
(uint64_t)virt_addr,
&mr_length);
if (mr == NULL) {
return -1;
}
cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
} else {
cmd->sgl[num_sgl_desc].keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
(uint64_t)virt_addr,
&mr_length);
}
if (mr == NULL || mr_length < sge_length) {
if (mr_length < sge_length) {
return -1;
}
cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
cmd->sgl[num_sgl_desc].keyed.length = sge_length;
cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr;
remaining_size -= sge_length;
@ -1017,11 +1063,11 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
* the NVMe command. */
rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
req->cmd.dptr.sgl1.keyed.length = req->payload_size;
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
req->cmd.dptr.sgl1.address = rqpair->cmds[rdma_req->id].sgl[0].address;
req->cmd.dptr.sgl1.keyed.type = cmd->sgl[0].keyed.type;
req->cmd.dptr.sgl1.keyed.subtype = cmd->sgl[0].keyed.subtype;
req->cmd.dptr.sgl1.keyed.length = cmd->sgl[0].keyed.length;
req->cmd.dptr.sgl1.keyed.key = cmd->sgl[0].keyed.key;
req->cmd.dptr.sgl1.address = cmd->sgl[0].address;
} else {
/*
* Otherwise, The SGL descriptor embedded in the command must point to the list of
@ -1363,6 +1409,11 @@ struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transpo
nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs);
if (g_nvme_hooks.get_ctx) {
rctrlr->hooks = g_nvme_hooks;
rctrlr->hook_ctx = rctrlr->hooks.get_ctx(&rctrlr->ctrlr.trid);
}
SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n");
return &rctrlr->ctrlr;
}
@ -1632,3 +1683,9 @@ nvme_rdma_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, siz
{
return 0;
}
void
spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks)
{
g_nvme_hooks = *hooks;
}