iokernel/dpdk: manually register memory for mlx4 driver

This commit is contained in:
Josh Fried 2019-01-13 18:46:56 -05:00
parent eb252973bd
commit 0fe1ddde39
5 changed files with 99 additions and 192 deletions

View File

@ -72,9 +72,7 @@ DPDK_LIBS += -lrte_mempool_stack
DPDK_LIBS += -lrte_ring
# additional libs for running with Mellanox NICs
ifneq ($(MLX),)
DPDK_LIBS += -Wl,-whole-archive -lrte_pmd_mlx4 -Wl,-no-whole-archive
DPDK_LIBS += -Wl,-whole-archive -libverbs -Wl,-no-whole-archive
DPDK_LIBS += -Wl,-whole-archive -lmlx4 -Wl,-no-whole-archive
DPDK_LIBS += -lrte_pmd_mlx4 -libverbs -lmlx4
endif
# must be first

View File

@ -96,6 +96,10 @@ struct proc {
/* Unique identifier -- never recycled across runtimes*/
uintptr_t uniqid;
#ifdef MLX
uint32_t lkey;
void *mr;
#endif
/* Overfloq queue for completion data */
size_t max_overflows;

View File

@ -12,6 +12,10 @@
#include "defs.h"
#ifdef MLX
#include <mlx4_custom.h>
#endif
#define MAC_TO_PROC_ENTRIES 128
static struct lrpc_chan_out lrpc_data_to_control;
@ -31,6 +35,12 @@ static void dp_clients_add_client(struct proc *p)
if (ret < 0)
log_err("dp_clients: failed to add MAC to hash table in add_client");
#ifdef MLX
p->mr = mlx4_manual_reg_mr(dp.port, p->region.base, p->region.len, &p->lkey);
if (!p->mr)
log_err("dp clients: failed to register memory with MLX nic");
#endif
cores_init_proc(p);
}
@ -67,6 +77,9 @@ static void dp_clients_remove_client(struct proc *p)
if (ret < 0)
log_err("dp_clients: failed to remove MAC from hash table in remove "
"client");
#ifdef MLX
mlx4_manual_dereg_mr(p->mr);
#endif
/* TODO: free queued packets/commands? */

View File

@ -23,9 +23,7 @@ static struct rte_mempool *tx_mbuf_pool;
*/
struct tx_pktmbuf_priv {
#ifdef MLX
uintptr_t uniqid;
uintptr_t start_addr;
uintptr_t end_addr;
uint32_t lkey;
#endif /* MLX */
struct proc *p;
struct thread *th;
@ -84,9 +82,7 @@ static void tx_prepare_tx_mbuf(struct rte_mbuf *buf,
#ifdef MLX
/* initialize private data used by Mellanox driver to register memory */
priv_data->uniqid = p->uniqid;
priv_data->start_addr = (uintptr_t) p->region.base;
priv_data->end_addr = (uintptr_t) p->region.base + p->region.len;
priv_data->lkey = p->lkey;
#endif /* MLX */
/* reference count @p so it doesn't get freed before the completion */

View File

@ -1,162 +1,72 @@
From 458bb8f0deab3c7fec6ed16706a7f2ad6a75e87c Mon Sep 17 00:00:00 2001
From: Amy Ousterhout <aousterh@mit.edu>
Date: Wed, 12 Dec 2018 17:44:10 -0500
Subject: [PATCH] mlx4 patch for DPDK 18.11
From 454f1d6f852804214a5a1d56a53d0fe3ba786b08 Mon Sep 17 00:00:00 2001
From: Josh Fried <joshuafried@gmail.com>
Date: Sun, 13 Jan 2019 18:42:41 -0500
Subject: [PATCH] support manually registering memory for MLX4
---
drivers/net/mlx4/mlx4_mr.c | 110 +++++++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.h | 9 +++
drivers/net/mlx4/mlx4_rxtx.c | 2 +-
drivers/net/mlx4/mlx4_rxtx.h | 46 +++++++++++++++
4 files changed, 166 insertions(+), 1 deletion(-)
drivers/net/mlx4/Makefile | 2 ++
drivers/net/mlx4/mlx4_custom.h | 8 ++++++++
drivers/net/mlx4/mlx4_mr.c | 17 +++++++++++++++++
drivers/net/mlx4/mlx4_rxtx.c | 2 +-
drivers/net/mlx4/mlx4_rxtx.h | 12 ++++++++++++
drivers/net/mlx4/rte_pmd_mlx4_version.map | 4 ++++
6 files changed, 44 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/mlx4/mlx4_custom.h
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 92e932250..9308ce7ad 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -10,6 +10,8 @@ LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
LIB_GLUE_BASE = librte_pmd_mlx4_glue.so
LIB_GLUE_VERSION = 18.02.0
+SYMLINK-$(CONFIG_RTE_LIBRTE_MLX4_PMD)-include += mlx4_custom.h
+
# Sources.
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_ethdev.c
diff --git a/drivers/net/mlx4/mlx4_custom.h b/drivers/net/mlx4/mlx4_custom.h
new file mode 100644
index 000000000..93520da22
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_custom.h
@@ -0,0 +1,8 @@
+
+#ifndef RTE_PMD_MLX4_CUSTOM_H
+#define RTE_PMD_MLX4_CUSTOM_H
+
+void *mlx4_manual_reg_mr(uint8_t port_id, void *addr, size_t length, uint32_t *lkey_out);
+void mlx4_manual_dereg_mr(void *ibv_mr);
+
+#endif /* RTE_PMD_MLX4_CUSTOM_H */
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index a0094483a..d75d659f8 100644
index a0094483a..7314de77c 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -1358,3 +1358,113 @@ mlx4_mr_release(struct rte_eth_dev *dev)
@@ -1358,3 +1358,20 @@ mlx4_mr_release(struct rte_eth_dev *dev)
/* Free all remaining MRs. */
mlx4_mr_garbage_collect(dev);
}
+
+/**
+ * Create a new memory region for a custom memory pool.
+ *
+ * @param dev
+ * Pointer to Ethernet device
+ * @param addr
+ * Start address of memory region
+ * @param len
+ * Length of memory region
+ * @param socket_id
+ * Socket to create the memory region description on
+ */
+struct mlx4_mr *
+mlx4_mr_create_custom(struct rte_eth_dev *dev, uintptr_t addr,
+ uint32_t len, int socket_id)
+void *
+mlx4_manual_reg_mr(uint8_t port_id, void *addr, size_t length, uint32_t *lkey_out)
+{
+ struct mlx4_mr *mr = NULL;
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct priv *priv = dev->data->dev_private;
+ struct ibv_mr *ibv_mr = mlx4_glue->reg_mr(priv->pd, addr, length, IBV_ACCESS_LOCAL_WRITE);
+ if (ibv_mr && lkey_out) *lkey_out = rte_cpu_to_be_32(ibv_mr->lkey);
+
+ mr = rte_zmalloc_socket(NULL,
+ RTE_ALIGN_CEIL(sizeof(*mr),
+ RTE_CACHE_LINE_SIZE),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (mr == NULL) {
+ WARN("port %u unable to allocate memory for a new MR",
+ dev->data->port_id);
+ return NULL;
+ }
+ DEBUG("port %u register MR for custom mempool", dev->data->port_id);
+ mr->ibv_mr = mlx4_glue->reg_mr(priv->pd, (void *)addr, len,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (mr->ibv_mr == NULL) {
+ WARN("port %u fail to create a verbs MR for address (%p)",
+ dev->data->port_id, (void *)addr);
+ rte_free(mr);
+ return NULL;
+ }
+ mr->msl = NULL; /* Mark it is external memory. */
+ mr->ms_bmp = NULL;
+ mr->ms_n = 1;
+ mr->ms_bmp_n = 1;
+ DEBUG("port %u MR CREATED (%p) for external memory %p:\n"
+ " [0x%" PRIxPTR ", 0x%" PRIxPTR "),"
+ " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u",
+ dev->data->port_id, (void *)mr, (void *)addr,
+ addr, addr + len, rte_cpu_to_be_32(mr->ibv_mr->lkey),
+ mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n);
+
+ return mr;
+ return ibv_mr;
+}
+
+/**
+ * Add memory region (MR) <-> memory id association to mr_ctrl->id2mr[].
+ * If id2mr[] is full, remove an entry first.
+ *
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param[in] m
+ * Info about region for which a memory region lkey must be added.
+ * @param[in] i
+ * Index in memory pool (MP) where to add memory region (MR).
+ * @param mp
+ * Mempool for which we're adding the mem region
+ *
+ * @return
+ * Added mr->lkey on success, (uint32_t)-1 on failure.
+ */
+uint32_t
+mlx4_txq_add_mr_from_mem_info(struct txq *txq, struct mem_info *m, uint32_t i,
+ struct rte_mempool *mp)
+void
+mlx4_manual_dereg_mr(void *ibv_mr)
+{
+ struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
+ struct mlx4_mr *mr;
+
+ if (i != RTE_DIM(mr_ctrl->id2mr) && mr_ctrl->id2mr[i].id != 0) {
+ /* Need to replace an existing entry. */
+ DEBUG("%p: replacing possibly stale MR", (void *)mr_ctrl);
+ mr_free(mr_ctrl->id2mr[i].mr);
+ }
+
+ /* Add a new entry, register MR first. */
+ DEBUG("%p: discovered new mem info", (void *)mr_ctrl);
+ mr = mlx4_mr_create_custom(txq->priv->dev, m->start_addr,
+ m->end_addr - m->start_addr, mp->socket_id);
+ if (unlikely(mr == NULL)) {
+ DEBUG("%p: unable to configure MR, mlx4_mr_get() failed",
+ (void *)txq);
+ return (uint32_t)-1;
+ }
+ if (unlikely(i == RTE_DIM(mr_ctrl->id2mr))) {
+ /* Table is full, remove oldest entry. */
+ DEBUG("%p: MR <-> ID table full, dropping oldest entry.",
+ (void *)mr_ctrl);
+ --i;
+ mr_free(mr_ctrl->id2mr[0].mr);
+ memmove(&mr_ctrl->id2mr[0], &mr_ctrl->id2mr[1],
+ (sizeof(mr_ctrl->id2mr) - sizeof(mr_ctrl->id2mr[0])));
+ }
+ /* Store the new entry. */
+ mr_ctrl->id2mr[i].id = m->unique_id;
+ mr_ctrl->id2mr[i].mr = mr;
+ mr_ctrl->id2mr[i].start = m->start_addr;
+ mr_ctrl->id2mr[i].end = m->end_addr;
+ mr_ctrl->id2mr[i].lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey);
+ DEBUG("%p: new MR lkey for mem_info %p %p: 0x%08" PRIu32,
+ (void *)mr_ctrl, (void *) m->start_addr, (void *) m->end_addr,
+ mr_ctrl->id2mr[i].lkey);
+ return mr_ctrl->id2mr[i].lkey;
+ mlx4_glue->dereg_mr(ibv_mr);
+}
diff --git a/drivers/net/mlx4/mlx4_mr.h b/drivers/net/mlx4/mlx4_mr.h
index 37a365a8b..d1b7ba0d9 100644
--- a/drivers/net/mlx4/mlx4_mr.h
+++ b/drivers/net/mlx4/mlx4_mr.h
@@ -64,6 +64,13 @@ struct mlx4_mr_ctrl {
uint16_t head; /* Index of the oldest entry in top-half cache. */
struct mlx4_mr_cache cache[MLX4_MR_CACHE_N]; /* Cache for top-half. */
struct mlx4_mr_btree cache_bh; /* Cache for bottom-half. */
+ struct {
+ uintptr_t id; /**< id of mem region (proc->uniqid). */
+ struct mlx4_mr *mr; /**< Memory region. */
+ uintptr_t start;
+ uintptr_t end;
+ uint32_t lkey; /**< mr->lkey copy. */
+ } __rte_packed id2mr[MLX4_MR_CACHE_N]; /**< id to MR translation table. */
} __rte_packed;
extern struct mlx4_dev_list mlx4_mem_event_cb_list;
@@ -81,6 +88,8 @@ int mlx4_mr_update_mp(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl,
struct rte_mempool *mp);
void mlx4_mr_dump_dev(struct rte_eth_dev *dev);
void mlx4_mr_release(struct rte_eth_dev *dev);
+struct mlx4_mr *mlx4_mr_create_custom(struct rte_eth_dev *dev, uintptr_t addr,
+ uint32_t len, int socket_id);
/**
* Look up LKey from given lookup table by linear search. Firstly look up the
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 8c88effcd..9f98e6533 100644
index 8c88effcd..73917699d 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -949,7 +949,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
@ -164,72 +74,58 @@ index 8c88effcd..9f98e6533 100644
break;
}
- lkey = mlx4_tx_mb2mr(txq, buf);
+ lkey = mlx4_tx_mb2mr_custom(txq, buf);
+ lkey = mlx4_tx_mb2mr_custom(buf);
if (unlikely(lkey == (uint32_t)-1)) {
/* MR does not exist. */
DEBUG("%p: unable to get MP <-> MR association",
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index d7ec4e0c5..2dbed57c7 100644
index d7ec4e0c5..9a6605aeb 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -165,6 +165,13 @@ uint32_t mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr);
@@ -26,6 +26,7 @@
#include "mlx4.h"
#include "mlx4_prm.h"
#include "mlx4_mr.h"
+#include "mlx4_custom.h"
/** Rx queue counters. */
struct mlx4_rxq_stats {
@@ -165,6 +166,9 @@ uint32_t mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr);
uint32_t mlx4_tx_mb2mr_bh(struct txq *txq, struct rte_mbuf *mb);
uint32_t mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr,
struct rte_mempool *mp);
+struct mem_info {
+ uintptr_t unique_id;
+ uintptr_t start_addr;
+ uintptr_t end_addr;
+ uint32_t lkey;
+};
+uint32_t mlx4_txq_add_mr_from_mem_info(struct txq *txq, struct mem_info *m,
+ uint32_t i, struct rte_mempool *mp);
/**
* Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the
@@ -243,4 +250,43 @@ mlx4_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
@@ -243,4 +247,12 @@ mlx4_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
return mlx4_tx_mb2mr_bh(txq, mb);
}
+#define __max(x,y) ((x) > (y) ? (x) : (y))
+#define __min(x,y) ((x) < (y) ? (x) : (y))
+/**
+ * Query LKey from a packet buffer for Tx. If not found, register the memory.
+ *
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param mb
+ * Buffer for which a memory region lkey must be returned.
+ *
+ * @return
+ * Searched LKey on success, UINT32_MAX on no match.
+ */
+static __rte_always_inline uint32_t
+mlx4_tx_mb2mr_custom(struct txq *txq, struct rte_mbuf *mb)
+mlx4_tx_mb2mr_custom(struct rte_mbuf *mb)
+{
+ struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
+ unsigned int i;
+ struct mem_info *m = (struct mem_info *)(((char *) mb) +
+ sizeof(struct rte_mbuf));
+
+ for (i = 0; (i != RTE_DIM(mr_ctrl->id2mr)); ++i) {
+ if (unlikely(mr_ctrl->id2mr[i].id == 0)) {
+ /* Unknown region, add a new MR for it. */
+ break;
+ }
+ if (mr_ctrl->id2mr[i].id == m->unique_id) {
+ /* Found region. */
+ return mr_ctrl->id2mr[i].lkey;
+ }
+ if (__max(mr_ctrl->id2mr[i].start, m->start_addr) <
+ __min(mr_ctrl->id2mr[i].end, m->end_addr)) {
+ /* Recreate lkey/mr for previously mapped regions */
+ return mlx4_txq_add_mr_from_mem_info(txq, m, i, mlx4_mb2mp(mb));
+ }
+ }
+ return mlx4_txq_add_mr_from_mem_info(txq, m, i, mlx4_mb2mp(mb));
+ return m->lkey;
+}
+
#endif /* MLX4_RXTX_H_ */
diff --git a/drivers/net/mlx4/rte_pmd_mlx4_version.map b/drivers/net/mlx4/rte_pmd_mlx4_version.map
index ef3539840..b932c2621 100644
--- a/drivers/net/mlx4/rte_pmd_mlx4_version.map
+++ b/drivers/net/mlx4/rte_pmd_mlx4_version.map
@@ -1,4 +1,8 @@
DPDK_2.0 {
local: *;
+
+ global:
+ mlx4_manual_reg_mr;
+ mlx4_manual_dereg_mr;
};
--
2.17.1