nvme: Add mechanism to override RDMA pd/mr behavior
Add a mechanism to modify the RDMA transport's behavior when creating protection domains and registering memory. This is entirely optional. Change-Id: I7cd850e76a673bf5521ca4815b779c53ab9567e8 Signed-off-by: zkhatami88 <z.khatami88@gmail.com> Reviewed-on: https://review.gerrithub.io/421415 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
161af0b5cb
commit
9fb6947617
@ -44,6 +44,7 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "spdk/config.h"
|
||||
#include "spdk/env.h"
|
||||
#include "spdk/nvme_spec.h"
|
||||
#include "spdk/nvmf_spec.h"
|
||||
@ -2038,6 +2039,60 @@ void spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
|
||||
struct spdk_nvme_qpair *qpair,
|
||||
uint8_t opc);
|
||||
|
||||
#ifdef SPDK_CONFIG_RDMA
|
||||
struct ibv_context;
|
||||
struct ibv_pd;
|
||||
struct ibv_mr;
|
||||
|
||||
/**
|
||||
* RDMA Transport Hooks
|
||||
*/
|
||||
struct spdk_nvme_rdma_hooks {
|
||||
/**
|
||||
* \brief Get a transport id specific context to be passed to
|
||||
* the other hooks.
|
||||
*
|
||||
* \param trid the transport id
|
||||
*
|
||||
* \return ctx to be passed to the other hooks
|
||||
*/
|
||||
void *(*get_ctx)(const struct spdk_nvme_transport_id *trid);
|
||||
|
||||
/**
|
||||
* \brief Get an InfiniBand Verbs protection domain.
|
||||
*
|
||||
* \param ctx Context returned from get_hook_ctx.
|
||||
* \param verbs Infiniband verbs context
|
||||
*
|
||||
* \return pd of the nvme ctrlr
|
||||
*/
|
||||
struct ibv_pd *(*get_ibv_pd)(void *ctx, struct ibv_context *verbs);
|
||||
|
||||
/**
|
||||
* \brief Get an InfiniBand Verbs memory region for a buffer.
|
||||
*
|
||||
* \param ctx Context returned from get_hook_ctx.
|
||||
* \param buf Memory buffer for which an rkey should be returned.
|
||||
* \param size size of buf
|
||||
*
|
||||
* \return Infiniband remote key (rkey) for this buf
|
||||
*/
|
||||
uint64_t (*get_rkey)(void *ctx, void *buf, size_t size);
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Set the global hooks for the RDMA transport, if necessary.
|
||||
*
|
||||
* This call is optional and must be performed prior to probing for
|
||||
* any devices. By default, the RDMA transport will use the ibverbs
|
||||
* library to create protection domains and register memory. This
|
||||
* is a mechanism to subvert that and use an existing registration.
|
||||
*
|
||||
* \param hooks for initializing global hooks
|
||||
*/
|
||||
void spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -71,6 +71,8 @@ struct spdk_nvmf_cmd {
|
||||
struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
|
||||
};
|
||||
|
||||
struct spdk_nvme_rdma_hooks g_nvme_hooks = {};
|
||||
|
||||
/* Mapping from virtual address to ibv_mr pointer for a protection domain */
|
||||
struct spdk_nvme_rdma_mr_map {
|
||||
struct ibv_pd *pd;
|
||||
@ -82,6 +84,10 @@ struct spdk_nvme_rdma_mr_map {
|
||||
/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
|
||||
struct nvme_rdma_ctrlr {
|
||||
struct spdk_nvme_ctrlr ctrlr;
|
||||
|
||||
struct spdk_nvme_rdma_hooks hooks;
|
||||
void *hook_ctx;
|
||||
struct ibv_pd *pd;
|
||||
};
|
||||
|
||||
/* NVMe RDMA qpair extensions for spdk_nvme_qpair */
|
||||
@ -241,6 +247,7 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
|
||||
{
|
||||
int rc;
|
||||
struct ibv_qp_init_attr attr;
|
||||
struct nvme_rdma_ctrlr *rctrlr;
|
||||
|
||||
rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0);
|
||||
if (!rqpair->cq) {
|
||||
@ -248,6 +255,13 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
|
||||
return -1;
|
||||
}
|
||||
|
||||
rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
|
||||
if (rctrlr->hooks.get_ibv_pd) {
|
||||
rctrlr->pd = rctrlr->hooks.get_ibv_pd(rctrlr->hook_ctx, rqpair->cm_id->verbs);
|
||||
} else {
|
||||
rctrlr->pd = NULL;
|
||||
}
|
||||
|
||||
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
|
||||
attr.qp_type = IBV_QPT_RC;
|
||||
attr.send_cq = rqpair->cq;
|
||||
@ -257,11 +271,12 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
|
||||
attr.cap.max_send_sge = NVME_RDMA_DEFAULT_TX_SGE;
|
||||
attr.cap.max_recv_sge = NVME_RDMA_DEFAULT_RX_SGE;
|
||||
|
||||
rc = rdma_create_qp(rqpair->cm_id, NULL, &attr);
|
||||
rc = rdma_create_qp(rqpair->cm_id, rctrlr->pd, &attr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("rdma_create_qp failed\n");
|
||||
return -1;
|
||||
}
|
||||
rctrlr->pd = rqpair->cm_id->qp->pd;
|
||||
|
||||
rqpair->cm_id->context = &rqpair->qpair;
|
||||
|
||||
@ -611,29 +626,38 @@ nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
|
||||
enum spdk_mem_map_notify_action action,
|
||||
void *vaddr, size_t size)
|
||||
{
|
||||
struct ibv_pd *pd = cb_ctx;
|
||||
struct nvme_rdma_ctrlr *rctrlr = cb_ctx;
|
||||
struct ibv_pd *pd;
|
||||
struct ibv_mr *mr;
|
||||
int rc;
|
||||
|
||||
switch (action) {
|
||||
case SPDK_MEM_MAP_NOTIFY_REGISTER:
|
||||
mr = ibv_reg_mr(pd, vaddr, size,
|
||||
IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_READ |
|
||||
IBV_ACCESS_REMOTE_WRITE);
|
||||
if (mr == NULL) {
|
||||
SPDK_ERRLOG("ibv_reg_mr() failed\n");
|
||||
return -EFAULT;
|
||||
if (!rctrlr->hooks.get_rkey) {
|
||||
pd = rctrlr->pd;
|
||||
mr = ibv_reg_mr(pd, vaddr, size,
|
||||
IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_READ |
|
||||
IBV_ACCESS_REMOTE_WRITE);
|
||||
if (mr == NULL) {
|
||||
SPDK_ERRLOG("ibv_reg_mr() failed\n");
|
||||
return -EFAULT;
|
||||
} else {
|
||||
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
|
||||
}
|
||||
} else {
|
||||
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
|
||||
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
|
||||
rctrlr->hooks.get_rkey(rctrlr->hook_ctx, vaddr, size));
|
||||
}
|
||||
break;
|
||||
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
|
||||
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
|
||||
if (mr) {
|
||||
ibv_dereg_mr(mr);
|
||||
if (!rctrlr->hooks.get_rkey) {
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
|
||||
if (mr) {
|
||||
ibv_dereg_mr(mr);
|
||||
}
|
||||
}
|
||||
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
|
||||
break;
|
||||
default:
|
||||
SPDK_UNREACHABLE();
|
||||
@ -673,7 +697,8 @@ nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair)
|
||||
|
||||
mr_map->ref = 1;
|
||||
mr_map->pd = pd;
|
||||
mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops, pd);
|
||||
mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops,
|
||||
nvme_rdma_ctrlr(rqpair->qpair.ctrlr));
|
||||
if (mr_map->map == NULL) {
|
||||
SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
|
||||
free(mr_map);
|
||||
@ -918,9 +943,21 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
|
||||
assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
|
||||
|
||||
requested_size = req->payload_size;
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
|
||||
&requested_size);
|
||||
if (mr == NULL || requested_size < req->payload_size) {
|
||||
if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {
|
||||
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
|
||||
&requested_size);
|
||||
if (mr == NULL) {
|
||||
return -1;
|
||||
}
|
||||
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
|
||||
} else {
|
||||
req->cmd.dptr.sgl1.keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
|
||||
(uint64_t)payload,
|
||||
&requested_size);
|
||||
}
|
||||
|
||||
if (requested_size < req->payload_size) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -937,7 +974,6 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
|
||||
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
||||
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
||||
req->cmd.dptr.sgl1.keyed.length = req->payload_size;
|
||||
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
|
||||
req->cmd.dptr.sgl1.address = (uint64_t)payload;
|
||||
|
||||
return 0;
|
||||
@ -977,17 +1013,27 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
|
||||
sge_length = spdk_min(remaining_size, sge_length);
|
||||
mr_length = sge_length;
|
||||
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr,
|
||||
&mr_length);
|
||||
if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map,
|
||||
(uint64_t)virt_addr,
|
||||
&mr_length);
|
||||
if (mr == NULL) {
|
||||
return -1;
|
||||
}
|
||||
cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
|
||||
} else {
|
||||
cmd->sgl[num_sgl_desc].keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
|
||||
(uint64_t)virt_addr,
|
||||
&mr_length);
|
||||
}
|
||||
|
||||
if (mr == NULL || mr_length < sge_length) {
|
||||
if (mr_length < sge_length) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
||||
cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
||||
cmd->sgl[num_sgl_desc].keyed.length = sge_length;
|
||||
cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
|
||||
cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr;
|
||||
|
||||
remaining_size -= sge_length;
|
||||
@ -1017,11 +1063,11 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
|
||||
* the NVMe command. */
|
||||
rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
|
||||
|
||||
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
||||
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
||||
req->cmd.dptr.sgl1.keyed.length = req->payload_size;
|
||||
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
|
||||
req->cmd.dptr.sgl1.address = rqpair->cmds[rdma_req->id].sgl[0].address;
|
||||
req->cmd.dptr.sgl1.keyed.type = cmd->sgl[0].keyed.type;
|
||||
req->cmd.dptr.sgl1.keyed.subtype = cmd->sgl[0].keyed.subtype;
|
||||
req->cmd.dptr.sgl1.keyed.length = cmd->sgl[0].keyed.length;
|
||||
req->cmd.dptr.sgl1.keyed.key = cmd->sgl[0].keyed.key;
|
||||
req->cmd.dptr.sgl1.address = cmd->sgl[0].address;
|
||||
} else {
|
||||
/*
|
||||
* Otherwise, The SGL descriptor embedded in the command must point to the list of
|
||||
@ -1363,6 +1409,11 @@ struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transpo
|
||||
|
||||
nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs);
|
||||
|
||||
if (g_nvme_hooks.get_ctx) {
|
||||
rctrlr->hooks = g_nvme_hooks;
|
||||
rctrlr->hook_ctx = rctrlr->hooks.get_ctx(&rctrlr->ctrlr.trid);
|
||||
}
|
||||
|
||||
SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n");
|
||||
return &rctrlr->ctrlr;
|
||||
}
|
||||
@ -1632,3 +1683,9 @@ nvme_rdma_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, siz
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks)
|
||||
{
|
||||
g_nvme_hooks = *hooks;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user