nvmf: combine tx_desc + rx_desc into rdma_request
Change-Id: I5fa886e0eeed08bd2fb03c237a2a8129c1df4720 Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
parent
996705f1b8
commit
29d4ad597f
511
lib/nvmf/rdma.c
511
lib/nvmf/rdma.c
@ -37,6 +37,7 @@
|
||||
#include <rdma/rdma_cma.h>
|
||||
#include <rdma/rdma_verbs.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <rte_config.h>
|
||||
#include <rte_debug.h>
|
||||
@ -49,6 +50,7 @@
|
||||
#include "nvmf.h"
|
||||
#include "port.h"
|
||||
#include "host.h"
|
||||
#include "spdk/assert.h"
|
||||
#include "spdk/log.h"
|
||||
#include "spdk/trace.h"
|
||||
|
||||
@ -62,9 +64,6 @@
|
||||
#define NVMF_DEFAULT_TX_SGE 1
|
||||
#define NVMF_DEFAULT_RX_SGE 2
|
||||
|
||||
static int alloc_qp_rx_desc(struct spdk_nvmf_conn *conn);
|
||||
static int alloc_qp_tx_desc(struct spdk_nvmf_conn *conn);
|
||||
|
||||
struct spdk_nvmf_rdma {
|
||||
struct rte_timer acceptor_timer;
|
||||
struct rdma_event_channel *acceptor_event_channel;
|
||||
@ -73,30 +72,11 @@ struct spdk_nvmf_rdma {
|
||||
|
||||
static struct spdk_nvmf_rdma g_rdma = { };
|
||||
|
||||
static void
|
||||
nvmf_active_tx_desc(struct nvme_qp_tx_desc *tx_desc)
|
||||
static inline struct spdk_nvmf_rdma_request *
|
||||
get_rdma_req(struct spdk_nvmf_request *req)
|
||||
{
|
||||
struct spdk_nvmf_conn *conn;
|
||||
|
||||
RTE_VERIFY(tx_desc != NULL);
|
||||
conn = tx_desc->conn;
|
||||
RTE_VERIFY(conn != NULL);
|
||||
|
||||
STAILQ_REMOVE(&conn->rdma.qp_tx_desc, tx_desc, nvme_qp_tx_desc, link);
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.qp_tx_active_desc, tx_desc, link);
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_deactive_tx_desc(struct nvme_qp_tx_desc *tx_desc)
|
||||
{
|
||||
struct spdk_nvmf_conn *conn;
|
||||
|
||||
RTE_VERIFY(tx_desc != NULL);
|
||||
conn = tx_desc->conn;
|
||||
RTE_VERIFY(tx_desc->conn != NULL);
|
||||
|
||||
STAILQ_REMOVE(&conn->rdma.qp_tx_active_desc, tx_desc, nvme_qp_tx_desc, link);
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.qp_tx_desc, tx_desc, link);
|
||||
return (struct spdk_nvmf_rdma_request *)((uintptr_t)req + offsetof(struct spdk_nvmf_rdma_request,
|
||||
req));
|
||||
}
|
||||
|
||||
static int
|
||||
@ -159,41 +139,32 @@ return_error:
|
||||
}
|
||||
|
||||
static void
|
||||
free_qp_desc(struct spdk_nvmf_conn *conn)
|
||||
free_rdma_req(struct spdk_nvmf_rdma_request *rdma_req)
|
||||
{
|
||||
struct nvme_qp_rx_desc *tmp_rx;
|
||||
struct nvme_qp_tx_desc *tmp_tx;
|
||||
int rc;
|
||||
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Enter\n");
|
||||
|
||||
STAILQ_FOREACH(tmp_rx, &conn->rdma.qp_rx_desc, link) {
|
||||
STAILQ_REMOVE(&conn->rdma.qp_rx_desc, tmp_rx, nvme_qp_rx_desc, link);
|
||||
|
||||
rc = rdma_dereg_mr(tmp_rx->bb_mr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to de-register rx bb mr\n");
|
||||
}
|
||||
|
||||
rte_free(tmp_rx->bb);
|
||||
|
||||
rc = rdma_dereg_mr(tmp_rx->cmd_mr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to de-register rx mr\n");
|
||||
}
|
||||
|
||||
rte_free(tmp_rx);
|
||||
if (rdma_req->cmd_mr && rdma_dereg_mr(rdma_req->cmd_mr)) {
|
||||
SPDK_ERRLOG("Unable to de-register cmd_mr\n");
|
||||
}
|
||||
|
||||
STAILQ_FOREACH(tmp_tx, &conn->rdma.qp_tx_desc, link) {
|
||||
STAILQ_REMOVE(&conn->rdma.qp_tx_desc, tmp_tx, nvme_qp_tx_desc, link);
|
||||
if (rdma_req->rsp_mr && rdma_dereg_mr(rdma_req->rsp_mr)) {
|
||||
SPDK_ERRLOG("Unable to de-register rsp_mr\n");
|
||||
}
|
||||
|
||||
rc = rdma_dereg_mr(tmp_tx->rsp_mr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to de-register tx mr\n");
|
||||
}
|
||||
if (rdma_req->bb_mr && rdma_dereg_mr(rdma_req->bb_mr)) {
|
||||
SPDK_ERRLOG("Unable to de-register bb_mr\n");
|
||||
}
|
||||
|
||||
rte_free(tmp_tx);
|
||||
rte_free(rdma_req->bb);
|
||||
rte_free(rdma_req);
|
||||
}
|
||||
|
||||
static void
|
||||
free_rdma_reqs(struct spdk_nvmf_conn *conn)
|
||||
{
|
||||
struct spdk_nvmf_rdma_request *rdma_req;
|
||||
|
||||
STAILQ_FOREACH(rdma_req, &conn->rdma.rdma_reqs, link) {
|
||||
STAILQ_REMOVE(&conn->rdma.rdma_reqs, rdma_req, spdk_nvmf_rdma_request, link);
|
||||
free_rdma_req(rdma_req);
|
||||
}
|
||||
}
|
||||
|
||||
@ -213,27 +184,20 @@ nvmf_drain_cq(struct spdk_nvmf_conn *conn)
|
||||
void
|
||||
nvmf_rdma_conn_cleanup(struct spdk_nvmf_conn *conn)
|
||||
{
|
||||
struct nvme_qp_tx_desc *pending_desc, *active_desc;
|
||||
struct spdk_nvmf_rdma_request *rdma_req;
|
||||
int rc;
|
||||
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Enter\n");
|
||||
|
||||
rdma_destroy_qp(conn->rdma.cm_id);
|
||||
|
||||
while (!STAILQ_EMPTY(&conn->rdma.qp_pending_desc)) {
|
||||
pending_desc = STAILQ_FIRST(&conn->rdma.qp_pending_desc);
|
||||
STAILQ_REMOVE_HEAD(&conn->rdma.qp_pending_desc, link);
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.qp_tx_desc, pending_desc, link);
|
||||
while (!STAILQ_EMPTY(&conn->rdma.pending_rdma_reqs)) {
|
||||
rdma_req = STAILQ_FIRST(&conn->rdma.pending_rdma_reqs);
|
||||
STAILQ_REMOVE_HEAD(&conn->rdma.pending_rdma_reqs, link);
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.rdma_reqs, rdma_req, link);
|
||||
}
|
||||
|
||||
/* Remove tx_desc from qp_tx_active_desc list to qp_tx_desc list */
|
||||
while (!STAILQ_EMPTY(&conn->rdma.qp_tx_active_desc)) {
|
||||
active_desc = STAILQ_FIRST(&conn->rdma.qp_tx_active_desc);
|
||||
STAILQ_REMOVE_HEAD(&conn->rdma.qp_tx_active_desc, link);
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.qp_tx_desc, active_desc, link);
|
||||
}
|
||||
|
||||
free_qp_desc(conn);
|
||||
free_rdma_reqs(conn);
|
||||
|
||||
nvmf_drain_cq(conn);
|
||||
rc = ibv_destroy_cq(conn->rdma.cq);
|
||||
@ -289,33 +253,26 @@ nvmf_post_rdma_read(struct spdk_nvmf_conn *conn,
|
||||
struct spdk_nvmf_request *req)
|
||||
{
|
||||
struct ibv_send_wr wr, *bad_wr = NULL;
|
||||
struct nvme_qp_tx_desc *tx_desc = req->tx_desc;
|
||||
struct nvme_qp_rx_desc *rx_desc = req->rx_desc;
|
||||
|
||||
struct spdk_nvmf_rdma_request *rdma_req = get_rdma_req(req);
|
||||
int rc;
|
||||
|
||||
if (rx_desc == NULL) {
|
||||
SPDK_ERRLOG("Rx descriptor does not exist at rdma read!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Queue the rdma read if it would exceed max outstanding
|
||||
* RDMA read limit.
|
||||
*/
|
||||
if (conn->rdma.pending_rdma_read_count == conn->rdma.initiator_depth) {
|
||||
SPDK_TRACELOG(SPDK_TRACE_RDMA, "Insert rdma read into pending queue: tx_desc %p\n",
|
||||
tx_desc);
|
||||
STAILQ_REMOVE(&conn->rdma.qp_tx_active_desc, tx_desc, nvme_qp_tx_desc, link);
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.qp_pending_desc, tx_desc, link);
|
||||
SPDK_TRACELOG(SPDK_TRACE_RDMA, "Insert rdma read into pending queue: rdma_req %p\n",
|
||||
rdma_req);
|
||||
STAILQ_REMOVE(&conn->rdma.rdma_reqs, rdma_req, spdk_nvmf_rdma_request, link);
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.pending_rdma_reqs, rdma_req, link);
|
||||
return 0;
|
||||
}
|
||||
conn->rdma.pending_rdma_read_count++;
|
||||
|
||||
/* temporarily adjust SGE to only copy what the host is prepared to send. */
|
||||
rx_desc->bb_sgl.length = req->length;
|
||||
rdma_req->bb_sgl.length = req->length;
|
||||
|
||||
nvmf_ibv_send_wr_init(&wr, req, &rx_desc->bb_sgl, (uint64_t)tx_desc,
|
||||
nvmf_ibv_send_wr_init(&wr, req, &rdma_req->bb_sgl, (uint64_t)rdma_req,
|
||||
IBV_WR_RDMA_READ, IBV_SEND_SIGNALED);
|
||||
|
||||
spdk_trace_record(TRACE_RDMA_READ_START, 0, 0, (uint64_t)req, 0);
|
||||
@ -331,19 +288,13 @@ nvmf_post_rdma_write(struct spdk_nvmf_conn *conn,
|
||||
struct spdk_nvmf_request *req)
|
||||
{
|
||||
struct ibv_send_wr wr, *bad_wr = NULL;
|
||||
struct nvme_qp_tx_desc *tx_desc = req->tx_desc;
|
||||
struct nvme_qp_rx_desc *rx_desc = req->rx_desc;
|
||||
struct spdk_nvmf_rdma_request *rdma_req = get_rdma_req(req);
|
||||
int rc;
|
||||
|
||||
if (rx_desc == NULL) {
|
||||
SPDK_ERRLOG("Rx descriptor does not exist at rdma write!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* temporarily adjust SGE to only copy what the host is prepared to receive. */
|
||||
rx_desc->bb_sgl.length = req->length;
|
||||
rdma_req->bb_sgl.length = req->length;
|
||||
|
||||
nvmf_ibv_send_wr_init(&wr, req, &rx_desc->bb_sgl, (uint64_t)tx_desc,
|
||||
nvmf_ibv_send_wr_init(&wr, req, &rdma_req->bb_sgl, (uint64_t)rdma_req,
|
||||
IBV_WR_RDMA_WRITE, 0);
|
||||
|
||||
spdk_trace_record(TRACE_RDMA_WRITE_START, 0, 0, (uint64_t)req, 0);
|
||||
@ -356,7 +307,7 @@ nvmf_post_rdma_write(struct spdk_nvmf_conn *conn,
|
||||
|
||||
static int
|
||||
nvmf_post_rdma_recv(struct spdk_nvmf_conn *conn,
|
||||
struct nvme_qp_rx_desc *rx_desc)
|
||||
struct spdk_nvmf_rdma_request *rdma_req)
|
||||
{
|
||||
struct ibv_recv_wr wr, *bad_wr = NULL;
|
||||
int rc;
|
||||
@ -368,20 +319,20 @@ nvmf_post_rdma_recv(struct spdk_nvmf_conn *conn,
|
||||
conn->sq_head < (conn->rdma.sq_depth - 1) ? (conn->sq_head++) : (conn->sq_head = 0);
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sq_head %x, sq_depth %x\n", conn->sq_head, conn->rdma.sq_depth);
|
||||
|
||||
wr.wr_id = (uintptr_t)rx_desc;
|
||||
wr.wr_id = (uintptr_t)rdma_req;
|
||||
wr.next = NULL;
|
||||
wr.sg_list = &rx_desc->recv_sgl;
|
||||
wr.sg_list = &rdma_req->recv_sgl;
|
||||
wr.num_sge = 1;
|
||||
|
||||
nvmf_trace_ibv_sge(&rx_desc->recv_sgl);
|
||||
nvmf_trace_ibv_sge(&rdma_req->recv_sgl);
|
||||
|
||||
/* for I/O queues we add bb sgl for in-capsule data use */
|
||||
if (conn->type == CONN_TYPE_IOQ) {
|
||||
wr.num_sge = 2;
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sgl2 local addr %p\n",
|
||||
(void *)rx_desc->bb_sgl.addr);
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sgl2 length %x\n", rx_desc->bb_sgl.length);
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sgl2 lkey %x\n", rx_desc->bb_sgl.lkey);
|
||||
(void *)rdma_req->bb_sgl.addr);
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sgl2 length %x\n", rdma_req->bb_sgl.length);
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sgl2 lkey %x\n", rdma_req->bb_sgl.lkey);
|
||||
}
|
||||
|
||||
rc = ibv_post_recv(conn->rdma.qp, &wr, &bad_wr);
|
||||
@ -396,26 +347,23 @@ nvmf_post_rdma_send(struct spdk_nvmf_conn *conn,
|
||||
struct spdk_nvmf_request *req)
|
||||
{
|
||||
struct ibv_send_wr wr, *bad_wr = NULL;
|
||||
struct nvme_qp_tx_desc *tx_desc = req->tx_desc;
|
||||
struct nvme_qp_rx_desc *rx_desc = req->rx_desc;
|
||||
struct spdk_nvmf_rdma_request *rdma_req = get_rdma_req(req);
|
||||
int rc;
|
||||
|
||||
RTE_VERIFY(rx_desc != NULL);
|
||||
|
||||
/* restore the SGL length that may have been modified */
|
||||
rx_desc->bb_sgl.length = rx_desc->bb_len;
|
||||
rdma_req->bb_sgl.length = rdma_req->bb_len;
|
||||
|
||||
/* Re-post recv */
|
||||
if (nvmf_post_rdma_recv(conn, rx_desc)) {
|
||||
if (nvmf_post_rdma_recv(conn, rdma_req)) {
|
||||
SPDK_ERRLOG("Unable to re-post rx descriptor\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
nvmf_ibv_send_wr_init(&wr, NULL, &tx_desc->send_sgl, (uint64_t)tx_desc,
|
||||
nvmf_ibv_send_wr_init(&wr, NULL, &rdma_req->send_sgl, (uint64_t)rdma_req,
|
||||
IBV_WR_SEND, IBV_SEND_SIGNALED);
|
||||
|
||||
SPDK_TRACELOG(SPDK_TRACE_RDMA, "tx_desc %p: req %p, rsp %p\n",
|
||||
tx_desc, req, req->rsp);
|
||||
SPDK_TRACELOG(SPDK_TRACE_RDMA, "rdma_req %p: req %p, rsp %p\n",
|
||||
rdma_req, req, req->rsp);
|
||||
|
||||
spdk_trace_record(TRACE_NVMF_IO_COMPLETE, 0, 0, (uint64_t)req, 0);
|
||||
rc = ibv_post_send(conn->rdma.qp, &wr, &bad_wr);
|
||||
@ -451,6 +399,99 @@ spdk_nvmf_rdma_request_complete(struct spdk_nvmf_conn *conn, struct spdk_nvmf_re
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
alloc_rdma_reqs(struct spdk_nvmf_conn *conn)
|
||||
{
|
||||
struct spdk_nvmf_rdma_request *rdma_req;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < conn->rdma.sq_depth; i++) {
|
||||
rdma_req = rte_zmalloc("nvmf_rdma_req", sizeof(*rdma_req), 0);
|
||||
if (!rdma_req) {
|
||||
SPDK_ERRLOG("Unable to get rdma_req\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rdma_req->cmd_mr = rdma_reg_msgs(conn->rdma.cm_id, &rdma_req->cmd, sizeof(rdma_req->cmd));
|
||||
if (rdma_req->cmd_mr == NULL) {
|
||||
SPDK_ERRLOG("Unable to register cmd_mr\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* initialize recv_sgl */
|
||||
rdma_req->recv_sgl.addr = (uint64_t)&rdma_req->cmd;
|
||||
rdma_req->recv_sgl.length = sizeof(rdma_req->cmd);
|
||||
rdma_req->recv_sgl.lkey = rdma_req->cmd_mr->lkey;
|
||||
|
||||
/* pre-assign a data bb (bounce buffer) with each RX descriptor */
|
||||
/*
|
||||
For admin queue, assign smaller BB size to support maximum data that
|
||||
would be exchanged related to admin commands. For IO queue, assign
|
||||
the large BB size that is equal to the maximum I/O transfer supported
|
||||
by the NVMe device. This large BB is also used for in-capsule receive
|
||||
data.
|
||||
*/
|
||||
if (conn->type == CONN_TYPE_AQ) {
|
||||
rdma_req->bb_len = SMALL_BB_MAX_SIZE;
|
||||
} else { // for IO queues
|
||||
rdma_req->bb_len = LARGE_BB_MAX_SIZE;
|
||||
}
|
||||
rdma_req->bb = rte_zmalloc("nvmf_bb", rdma_req->bb_len, 0);
|
||||
if (!rdma_req->bb) {
|
||||
SPDK_ERRLOG("Unable to get %u-byte bounce buffer\n", rdma_req->bb_len);
|
||||
goto fail;
|
||||
}
|
||||
rdma_req->bb_mr = rdma_reg_read(conn->rdma.cm_id,
|
||||
(void *)rdma_req->bb,
|
||||
rdma_req->bb_len);
|
||||
if (rdma_req->bb_mr == NULL) {
|
||||
SPDK_ERRLOG("Unable to register bb_mr\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* initialize bb_sgl */
|
||||
rdma_req->bb_sgl.addr = (uint64_t)rdma_req->bb;
|
||||
rdma_req->bb_sgl.length = rdma_req->bb_len;
|
||||
rdma_req->bb_sgl.lkey = rdma_req->bb_mr->lkey;
|
||||
|
||||
rdma_req->rsp_mr = rdma_reg_msgs(conn->rdma.cm_id, &rdma_req->rsp, sizeof(rdma_req->rsp));
|
||||
if (rdma_req->rsp_mr == NULL) {
|
||||
SPDK_ERRLOG("Unable to register rsp_mr\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* initialize send_sgl */
|
||||
rdma_req->send_sgl.addr = (uint64_t)&rdma_req->rsp;
|
||||
rdma_req->send_sgl.length = sizeof(rdma_req->rsp);
|
||||
rdma_req->send_sgl.lkey = rdma_req->rsp_mr->lkey;
|
||||
|
||||
rdma_req->req.cmd = &rdma_req->cmd;
|
||||
rdma_req->req.rsp = &rdma_req->rsp;
|
||||
rdma_req->req.conn = conn;
|
||||
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "rdma_req %p: req %p, rsp %p\n",
|
||||
rdma_req, &rdma_req->req,
|
||||
rdma_req->req.rsp);
|
||||
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.rdma_reqs, rdma_req, link);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
/* cleanup any partial rdma_req that failed during init loop */
|
||||
if (rdma_req != NULL) {
|
||||
free_rdma_req(rdma_req);
|
||||
}
|
||||
|
||||
STAILQ_FOREACH(rdma_req, &conn->rdma.rdma_reqs, link) {
|
||||
STAILQ_REMOVE(&conn->rdma.rdma_reqs, rdma_req, spdk_nvmf_rdma_request, link);
|
||||
free_rdma_req(rdma_req);
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_rdma_connect(struct rdma_cm_event *event)
|
||||
{
|
||||
@ -458,7 +499,7 @@ nvmf_rdma_connect(struct rdma_cm_event *event)
|
||||
struct spdk_nvmf_fabric_intf *fabric_intf;
|
||||
struct rdma_cm_id *conn_id;
|
||||
struct spdk_nvmf_conn *conn;
|
||||
struct nvme_qp_rx_desc *rx_desc;
|
||||
struct spdk_nvmf_rdma_request *rdma_req;
|
||||
struct ibv_device_attr ibdev_attr;
|
||||
struct sockaddr_in *addr;
|
||||
struct rdma_conn_param *param = NULL;
|
||||
@ -599,30 +640,20 @@ nvmf_rdma_connect(struct rdma_cm_event *event)
|
||||
}
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "NVMf fabric connection initialized\n");
|
||||
|
||||
STAILQ_INIT(&conn->rdma.qp_pending_desc);
|
||||
STAILQ_INIT(&conn->rdma.qp_rx_desc);
|
||||
STAILQ_INIT(&conn->rdma.qp_tx_desc);
|
||||
STAILQ_INIT(&conn->rdma.qp_tx_active_desc);
|
||||
STAILQ_INIT(&conn->rdma.pending_rdma_reqs);
|
||||
STAILQ_INIT(&conn->rdma.rdma_reqs);
|
||||
|
||||
/* Allocate AQ QP RX Buffers */
|
||||
rc = alloc_qp_rx_desc(conn);
|
||||
/* Allocate Buffers */
|
||||
rc = alloc_rdma_reqs(conn);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to allocate connection rx desc\n");
|
||||
SPDK_ERRLOG("Unable to allocate connection RDMA requests\n");
|
||||
goto err2;
|
||||
}
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Rx buffers allocated\n");
|
||||
|
||||
/* Allocate AQ QP TX Buffers */
|
||||
rc = alloc_qp_tx_desc(conn);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to allocate connection tx desc\n");
|
||||
goto err2;
|
||||
}
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Tx buffers allocated\n");
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "RDMA requests allocated\n");
|
||||
|
||||
/* Post all the RX descriptors */
|
||||
STAILQ_FOREACH(rx_desc, &conn->rdma.qp_rx_desc, link) {
|
||||
if (nvmf_post_rdma_recv(conn, rx_desc)) {
|
||||
STAILQ_FOREACH(rdma_req, &conn->rdma.rdma_reqs, link) {
|
||||
if (nvmf_post_rdma_recv(conn, rdma_req)) {
|
||||
SPDK_ERRLOG("Unable to post connection rx desc\n");
|
||||
goto err2;
|
||||
}
|
||||
@ -927,197 +958,22 @@ nvmf_rdma_init(void)
|
||||
return num_devices_found;
|
||||
}
|
||||
|
||||
/* Populate the AQ QP Rx Buffer Resources */
|
||||
static int
|
||||
alloc_qp_rx_desc(struct spdk_nvmf_conn *conn)
|
||||
{
|
||||
struct nvme_qp_rx_desc *rx_desc, *tmp;
|
||||
int i;
|
||||
int rc;
|
||||
|
||||
/* Allocate buffer for rx descriptors (RX WQE + Msg Buffer) */
|
||||
for (i = 0; i < conn->rdma.sq_depth; i++) {
|
||||
rx_desc = rte_zmalloc("nvmf_rx_desc", sizeof(*rx_desc), 0);
|
||||
if (!rx_desc) {
|
||||
SPDK_ERRLOG("Unable to get rx desc object\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rx_desc->cmd_mr = rdma_reg_msgs(conn->rdma.cm_id, &rx_desc->cmd, sizeof(rx_desc->cmd));
|
||||
if (rx_desc->cmd_mr == NULL) {
|
||||
SPDK_ERRLOG("Unable to register rx desc buffer mr\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rx_desc->conn = conn;
|
||||
|
||||
/* initialize recv_sgl of tx_desc */
|
||||
rx_desc->recv_sgl.addr = (uint64_t)&rx_desc->cmd;
|
||||
rx_desc->recv_sgl.length = sizeof(rx_desc->cmd);
|
||||
rx_desc->recv_sgl.lkey = rx_desc->cmd_mr->lkey;
|
||||
|
||||
/* pre-assign a data bb (bounce buffer) with each RX descriptor */
|
||||
/*
|
||||
For admin queue, assign smaller BB size to support maximum data that
|
||||
would be exchanged related to admin commands. For IO queue, assign
|
||||
the large BB size that is equal to the maximum I/O transfer supported
|
||||
by the NVMe device. This large BB is also used for in-capsule receive
|
||||
data.
|
||||
*/
|
||||
if (conn->type == CONN_TYPE_AQ) {
|
||||
rx_desc->bb_len = SMALL_BB_MAX_SIZE;
|
||||
} else { // for IO queues
|
||||
rx_desc->bb_len = LARGE_BB_MAX_SIZE;
|
||||
}
|
||||
rx_desc->bb = rte_zmalloc("nvmf_bb", rx_desc->bb_len, 0);
|
||||
if (!rx_desc->bb) {
|
||||
SPDK_ERRLOG("Unable to get %u-byte bounce buffer\n", rx_desc->bb_len);
|
||||
goto fail;
|
||||
}
|
||||
rx_desc->bb_mr = rdma_reg_read(conn->rdma.cm_id,
|
||||
(void *)rx_desc->bb,
|
||||
rx_desc->bb_len);
|
||||
if (rx_desc->bb_mr == NULL) {
|
||||
SPDK_ERRLOG("Unable to register rx bb mr\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* initialize bb_sgl of rx_desc */
|
||||
rx_desc->bb_sgl.addr = (uint64_t)rx_desc->bb;
|
||||
rx_desc->bb_sgl.length = rx_desc->bb_len;
|
||||
rx_desc->bb_sgl.lkey = rx_desc->bb_mr->lkey;
|
||||
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.qp_rx_desc, rx_desc, link);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
/* cleanup any partial descriptor that failed during init loop */
|
||||
if (rx_desc != NULL) {
|
||||
if (rx_desc->bb_mr) {
|
||||
rc = rdma_dereg_mr(rx_desc->bb_mr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to de-register rx bb mr\n");
|
||||
}
|
||||
}
|
||||
|
||||
rte_free(rx_desc->bb);
|
||||
|
||||
if (rx_desc->cmd_mr) {
|
||||
rc = rdma_dereg_mr(rx_desc->cmd_mr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to de-register rx mr\n");
|
||||
}
|
||||
}
|
||||
|
||||
rte_free(rx_desc);
|
||||
}
|
||||
|
||||
STAILQ_FOREACH(tmp, &conn->rdma.qp_rx_desc, link) {
|
||||
STAILQ_REMOVE(&conn->rdma.qp_rx_desc, tmp, nvme_qp_rx_desc, link);
|
||||
|
||||
rc = rdma_dereg_mr(tmp->bb_mr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to de-register rx bb mr\n");
|
||||
}
|
||||
|
||||
rte_free(tmp->bb);
|
||||
|
||||
rc = rdma_dereg_mr(tmp->cmd_mr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to de-register rx mr\n");
|
||||
}
|
||||
|
||||
rte_free(tmp);
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Allocate the AQ QP Tx Buffer Resources */
|
||||
static int
|
||||
alloc_qp_tx_desc(struct spdk_nvmf_conn *conn)
|
||||
{
|
||||
struct nvme_qp_tx_desc *tx_desc, *tmp;
|
||||
int i;
|
||||
int rc;
|
||||
|
||||
/* Initialize the tx descriptors */
|
||||
for (i = 0; i < conn->rdma.cq_depth; i++) {
|
||||
tx_desc = rte_zmalloc("nvmf_tx_desc", sizeof(*tx_desc), 0);
|
||||
if (!tx_desc) {
|
||||
SPDK_ERRLOG("Unable to get tx desc object\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
tx_desc->rsp_mr = rdma_reg_msgs(conn->rdma.cm_id, &tx_desc->rsp, sizeof(tx_desc->rsp));
|
||||
if (tx_desc->rsp_mr == NULL) {
|
||||
SPDK_ERRLOG("Unable to register tx desc buffer mr\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
tx_desc->conn = conn;
|
||||
|
||||
/* initialize send_sgl of tx_desc */
|
||||
tx_desc->send_sgl.addr = (uint64_t)&tx_desc->rsp;
|
||||
tx_desc->send_sgl.length = sizeof(tx_desc->rsp);
|
||||
tx_desc->send_sgl.lkey = tx_desc->rsp_mr->lkey;
|
||||
|
||||
/* init request state associated with each tx_desc */
|
||||
tx_desc->req.rsp = &tx_desc->rsp;
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "tx_desc %p: req %p, rsp %p\n",
|
||||
tx_desc, &tx_desc->req,
|
||||
tx_desc->req.rsp);
|
||||
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.qp_tx_desc, tx_desc, link);
|
||||
}
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
/* cleanup any partial descriptor that failed during init loop */
|
||||
if (tx_desc != NULL) {
|
||||
|
||||
if (tx_desc->rsp_mr) {
|
||||
rc = rdma_dereg_mr(tx_desc->rsp_mr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to de-register tx mr\n");
|
||||
}
|
||||
}
|
||||
|
||||
rte_free(tx_desc);
|
||||
}
|
||||
|
||||
STAILQ_FOREACH(tmp, &conn->rdma.qp_tx_desc, link) {
|
||||
STAILQ_REMOVE(&conn->rdma.qp_tx_desc, tmp, nvme_qp_tx_desc, link);
|
||||
|
||||
rc = rdma_dereg_mr(tmp->rsp_mr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to de-register tx mr\n");
|
||||
}
|
||||
|
||||
rte_free(tmp);
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_process_pending_rdma(struct spdk_nvmf_conn *conn)
|
||||
{
|
||||
struct nvme_qp_tx_desc *tx_desc;
|
||||
struct spdk_nvmf_rdma_request *rdma_req;
|
||||
int rc;
|
||||
|
||||
conn->rdma.pending_rdma_read_count--;
|
||||
if (!STAILQ_EMPTY(&conn->rdma.qp_pending_desc)) {
|
||||
tx_desc = STAILQ_FIRST(&conn->rdma.qp_pending_desc);
|
||||
STAILQ_REMOVE_HEAD(&conn->rdma.qp_pending_desc, link);
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.qp_tx_active_desc, tx_desc, link);
|
||||
if (!STAILQ_EMPTY(&conn->rdma.pending_rdma_reqs)) {
|
||||
rdma_req = STAILQ_FIRST(&conn->rdma.pending_rdma_reqs);
|
||||
STAILQ_REMOVE_HEAD(&conn->rdma.pending_rdma_reqs, link);
|
||||
STAILQ_INSERT_TAIL(&conn->rdma.rdma_reqs, rdma_req, link);
|
||||
|
||||
SPDK_TRACELOG(SPDK_TRACE_RDMA, "Issue rdma read from pending queue: tx_desc %p\n",
|
||||
tx_desc);
|
||||
SPDK_TRACELOG(SPDK_TRACE_RDMA, "Issue rdma read from pending queue: rdma_req %p\n",
|
||||
rdma_req);
|
||||
|
||||
rc = nvmf_post_rdma_read(conn, &tx_desc->req);
|
||||
rc = nvmf_post_rdma_read(conn, &rdma_req->req);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Unable to post pending rdma read descriptor\n");
|
||||
return -1;
|
||||
@ -1131,12 +987,11 @@ nvmf_process_pending_rdma(struct spdk_nvmf_conn *conn)
|
||||
static int
|
||||
nvmf_recv(struct spdk_nvmf_conn *conn, struct ibv_wc *wc)
|
||||
{
|
||||
struct nvme_qp_rx_desc *rx_desc;
|
||||
struct nvme_qp_tx_desc *tx_desc;
|
||||
struct spdk_nvmf_rdma_request *rdma_req;
|
||||
struct spdk_nvmf_request *req;
|
||||
int ret;
|
||||
|
||||
rx_desc = (struct nvme_qp_rx_desc *)wc->wr_id;
|
||||
rdma_req = (struct spdk_nvmf_rdma_request *)wc->wr_id;
|
||||
|
||||
if (wc->byte_len < sizeof(struct spdk_nvmf_capsule_cmd)) {
|
||||
SPDK_ERRLOG("recv length less than capsule header\n");
|
||||
@ -1144,23 +999,11 @@ nvmf_recv(struct spdk_nvmf_conn *conn, struct ibv_wc *wc)
|
||||
}
|
||||
SPDK_TRACELOG(SPDK_TRACE_NVMF, "recv byte count 0x%x\n", wc->byte_len);
|
||||
|
||||
/* get a response buffer */
|
||||
if (STAILQ_EMPTY(&conn->rdma.qp_tx_desc)) {
|
||||
SPDK_ERRLOG("tx desc pool empty!\n");
|
||||
return -1;
|
||||
}
|
||||
tx_desc = STAILQ_FIRST(&conn->rdma.qp_tx_desc);
|
||||
nvmf_active_tx_desc(tx_desc);
|
||||
|
||||
req = &tx_desc->req;
|
||||
req->conn = conn;
|
||||
req->tx_desc = tx_desc;
|
||||
req->rx_desc = rx_desc;
|
||||
req->cmd = &rx_desc->cmd;
|
||||
req = &rdma_req->req;
|
||||
|
||||
ret = spdk_nvmf_request_prep_data(req,
|
||||
rx_desc->bb, wc->byte_len - sizeof(struct spdk_nvmf_capsule_cmd),
|
||||
rx_desc->bb, rx_desc->bb_sgl.length);
|
||||
rdma_req->bb, wc->byte_len - sizeof(struct spdk_nvmf_capsule_cmd),
|
||||
rdma_req->bb, rdma_req->bb_sgl.length);
|
||||
if (ret < 0) {
|
||||
SPDK_ERRLOG("prep_data failed\n");
|
||||
return spdk_nvmf_request_complete(req);
|
||||
@ -1188,7 +1031,7 @@ int
|
||||
nvmf_check_rdma_completions(struct spdk_nvmf_conn *conn)
|
||||
{
|
||||
struct ibv_wc wc;
|
||||
struct nvme_qp_tx_desc *tx_desc;
|
||||
struct spdk_nvmf_rdma_request *rdma_req;
|
||||
struct spdk_nvmf_request *req;
|
||||
int rc;
|
||||
int cq_count = 0;
|
||||
@ -1217,8 +1060,6 @@ nvmf_check_rdma_completions(struct spdk_nvmf_conn *conn)
|
||||
switch (wc.opcode) {
|
||||
case IBV_WC_SEND:
|
||||
SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ send completion\n");
|
||||
tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id;
|
||||
nvmf_deactive_tx_desc(tx_desc);
|
||||
break;
|
||||
|
||||
case IBV_WC_RDMA_WRITE:
|
||||
@ -1227,15 +1068,15 @@ nvmf_check_rdma_completions(struct spdk_nvmf_conn *conn)
|
||||
* flag in rdma_write, to trace rdma write latency
|
||||
*/
|
||||
SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma write completion\n");
|
||||
tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id;
|
||||
req = &tx_desc->req;
|
||||
rdma_req = (struct spdk_nvmf_rdma_request *)wc.wr_id;
|
||||
req = &rdma_req->req;
|
||||
spdk_trace_record(TRACE_RDMA_WRITE_COMPLETE, 0, 0, (uint64_t)req, 0);
|
||||
break;
|
||||
|
||||
case IBV_WC_RDMA_READ:
|
||||
SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma read completion\n");
|
||||
tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id;
|
||||
req = &tx_desc->req;
|
||||
rdma_req = (struct spdk_nvmf_rdma_request *)wc.wr_id;
|
||||
req = &rdma_req->req;
|
||||
spdk_trace_record(TRACE_RDMA_READ_COMPLETE, 0, 0, (uint64_t)req, 0);
|
||||
rc = spdk_nvmf_request_exec(req);
|
||||
if (rc) {
|
||||
|
@ -54,34 +54,27 @@ struct spdk_nvmf_rdma_conn {
|
||||
uint8_t responder_resources;
|
||||
uint8_t initiator_depth;
|
||||
uint8_t pending_rdma_read_count;
|
||||
STAILQ_HEAD(qp_pending_desc, nvme_qp_tx_desc) qp_pending_desc;
|
||||
|
||||
STAILQ_HEAD(qp_rx_desc, nvme_qp_rx_desc) qp_rx_desc;
|
||||
STAILQ_HEAD(qp_tx_desc, nvme_qp_tx_desc) qp_tx_desc;
|
||||
STAILQ_HEAD(qp_tx_active_desc, nvme_qp_tx_desc) qp_tx_active_desc;
|
||||
STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_reqs;
|
||||
STAILQ_HEAD(, spdk_nvmf_rdma_request) rdma_reqs;
|
||||
};
|
||||
|
||||
/* Define the Admin Queue Rx/Tx Descriptors */
|
||||
struct spdk_nvmf_rdma_request {
|
||||
struct spdk_nvmf_request req;
|
||||
STAILQ_ENTRY(spdk_nvmf_rdma_request) link;
|
||||
|
||||
struct nvme_qp_rx_desc {
|
||||
union nvmf_h2c_msg cmd;
|
||||
struct spdk_nvmf_conn *conn;
|
||||
struct ibv_mr *cmd_mr;
|
||||
struct ibv_sge recv_sgl;
|
||||
struct ibv_sge bb_sgl; /* must follow recv_sgl */
|
||||
struct ibv_mr *bb_mr;
|
||||
uint8_t *bb;
|
||||
uint32_t bb_len;
|
||||
STAILQ_ENTRY(nvme_qp_rx_desc) link;
|
||||
};
|
||||
union nvmf_h2c_msg cmd;
|
||||
struct ibv_mr *cmd_mr;
|
||||
|
||||
struct nvme_qp_tx_desc {
|
||||
union nvmf_c2h_msg rsp;
|
||||
struct spdk_nvmf_conn *conn;
|
||||
struct spdk_nvmf_request req;
|
||||
struct ibv_mr *rsp_mr;
|
||||
struct ibv_sge send_sgl;
|
||||
STAILQ_ENTRY(nvme_qp_tx_desc) link;
|
||||
union nvmf_c2h_msg rsp;
|
||||
struct ibv_mr *rsp_mr;
|
||||
|
||||
struct ibv_sge send_sgl;
|
||||
struct ibv_sge recv_sgl;
|
||||
struct ibv_sge bb_sgl; /* must follow recv_sgl */
|
||||
|
||||
struct ibv_mr *bb_mr;
|
||||
uint8_t *bb;
|
||||
uint32_t bb_len;
|
||||
};
|
||||
|
||||
int nvmf_post_rdma_read(struct spdk_nvmf_conn *conn,
|
||||
|
@ -59,8 +59,6 @@ SPDK_STATIC_ASSERT(sizeof(union nvmf_c2h_msg) == 16, "Incorrect size");
|
||||
|
||||
struct spdk_nvmf_request {
|
||||
struct spdk_nvmf_conn *conn;
|
||||
struct nvme_qp_tx_desc *tx_desc;
|
||||
struct nvme_qp_rx_desc *rx_desc;
|
||||
uint64_t remote_addr;
|
||||
uint32_t rkey;
|
||||
uint32_t length;
|
||||
|
Loading…
Reference in New Issue
Block a user