net/mlx4: share memory region resources

Memory regions assigned to hardware and used during Tx/Rx are mapped to
mbuf pools. Each Rx queue creates its own MR based on the mempool
provided during queue setup, while each Tx queue looks up and registers
MRs for all existing mbuf pools instead.

Since most applications use few large mbuf pools (usually only a single
one per NUMA node) common to all Tx/Rx queues, the above approach wastes
hardware resources due to redundant MRs. This negatively affects
performance, particularly with large numbers of queues.

This patch therefore makes the entire MR registration common to all
queues using a reference count. A spinlock is added to protect against
asynchronous registration that may occur from the Tx side where new
mempools are discovered based on mbuf data.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
This commit is contained in:
Adrien Mazarguil 2017-11-02 19:14:22 +01:00 committed by Ferruh Yigit
parent b3d197b435
commit 0d03353077
5 changed files with 97 additions and 22 deletions

View File

@ -51,6 +51,7 @@
#include <rte_ether.h>
#include <rte_interrupts.h>
#include <rte_mempool.h>
#include <rte_spinlock.h>
/** Maximum number of simultaneous MAC addresses. This value is arbitrary. */
#define MLX4_MAX_MAC_ADDRESSES 128
@ -100,6 +101,18 @@ struct rxq;
struct txq;
struct rte_flow;
/** Memory region descriptor. */
struct mlx4_mr {
LIST_ENTRY(mlx4_mr) next; /**< Next entry in list. */
uintptr_t start; /**< Base address for memory region. */
uintptr_t end; /**< End address for memory region. */
uint32_t lkey; /**< L_Key extracted from @p mr. */
uint32_t refcnt; /**< Reference count for this object. */
struct priv *priv; /**< Back pointer to private data. */
struct ibv_mr *mr; /**< Memory region associated with @p mp. */
struct rte_mempool *mp; /**< Target memory pool (mempool). */
};
/** Private data structure. */
struct priv {
struct rte_eth_dev *dev; /**< Ethernet device. */
@ -119,6 +132,8 @@ struct priv {
struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */
LIST_HEAD(, mlx4_rss) rss; /**< Shared targets for Rx flow rules. */
LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
LIST_HEAD(, mlx4_mr) mr; /**< Registered memory regions. */
rte_spinlock_t mr_lock; /**< Lock for @p mr access. */
struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
/**< Configured MAC addresses. Unused entries are zeroed. */
};
@ -159,7 +174,8 @@ int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
/* mlx4_mr.c */
struct ibv_mr *mlx4_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp);
struct mlx4_mr *mlx4_mr_get(struct priv *priv, struct rte_mempool *mp);
void mlx4_mr_put(struct mlx4_mr *mr);
uint32_t mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp,
uint32_t i);

View File

@ -55,8 +55,10 @@
#include <rte_branch_prediction.h>
#include <rte_common.h>
#include <rte_errno.h>
#include <rte_malloc.h>
#include <rte_memory.h>
#include <rte_mempool.h>
#include <rte_spinlock.h>
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
@ -135,24 +137,27 @@ mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start, uintptr_t *end)
}
/**
* Register mempool as a memory region.
* Obtain a memory region from a memory pool.
*
* @param pd
* Pointer to protection domain.
* If a matching memory region already exists, it is returned with its
* reference count incremented, otherwise a new one is registered.
*
* @param priv
* Pointer to private structure.
* @param mp
* Pointer to memory pool.
*
* @return
* Memory region pointer, NULL in case of error and rte_errno is set.
*/
struct ibv_mr *
mlx4_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
struct mlx4_mr *
mlx4_mr_get(struct priv *priv, struct rte_mempool *mp)
{
const struct rte_memseg *ms = rte_eal_get_physmem_layout();
uintptr_t start;
uintptr_t end;
unsigned int i;
struct ibv_mr *mr;
struct mlx4_mr *mr;
if (mlx4_check_mempool(mp, &start, &end) != 0) {
rte_errno = EINVAL;
@ -177,15 +182,70 @@ mlx4_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
(void *)mp, (void *)start, (void *)end,
(size_t)(end - start));
mr = ibv_reg_mr(pd,
(void *)start,
end - start,
IBV_ACCESS_LOCAL_WRITE);
if (!mr)
rte_spinlock_lock(&priv->mr_lock);
LIST_FOREACH(mr, &priv->mr, next)
if (mp == mr->mp && start >= mr->start && end <= mr->end)
break;
if (mr) {
++mr->refcnt;
goto release;
}
mr = rte_malloc(__func__, sizeof(*mr), 0);
if (!mr) {
rte_errno = ENOMEM;
goto release;
}
*mr = (struct mlx4_mr){
.start = start,
.end = end,
.refcnt = 1,
.priv = priv,
.mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
IBV_ACCESS_LOCAL_WRITE),
.mp = mp,
};
if (mr->mr) {
mr->lkey = mr->mr->lkey;
LIST_INSERT_HEAD(&priv->mr, mr, next);
} else {
rte_free(mr);
mr = NULL;
rte_errno = errno ? errno : EINVAL;
}
release:
rte_spinlock_unlock(&priv->mr_lock);
return mr;
}
/**
* Release a memory region.
*
* This function decrements its reference count and destroys it after
* reaching 0.
*
* Note to avoid race conditions given this function may be used from the
* data plane, it's extremely important that each user holds its own
* reference.
*
* @param mr
* Memory region to release.
*/
void
mlx4_mr_put(struct mlx4_mr *mr)
{
struct priv *priv = mr->priv;
rte_spinlock_lock(&priv->mr_lock);
assert(mr->refcnt);
if (--mr->refcnt)
goto release;
LIST_REMOVE(mr, next);
claim_zero(ibv_dereg_mr(mr->mr));
rte_free(mr);
release:
rte_spinlock_unlock(&priv->mr_lock);
}
/**
* Add memory region (MR) <-> memory pool (MP) association to txq->mp2mr[].
* If mp2mr[] is full, remove an entry first.
@ -203,14 +263,14 @@ mlx4_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
uint32_t
mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp, uint32_t i)
{
struct ibv_mr *mr;
struct mlx4_mr *mr;
/* Add a new entry, register MR first. */
DEBUG("%p: discovered new memory pool \"%s\" (%p)",
(void *)txq, mp->name, (void *)mp);
mr = mlx4_mp2mr(txq->priv->pd, mp);
mr = mlx4_mr_get(txq->priv, mp);
if (unlikely(mr == NULL)) {
DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
DEBUG("%p: unable to configure MR, mlx4_mr_get() failed",
(void *)txq);
return (uint32_t)-1;
}
@ -219,7 +279,7 @@ mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp, uint32_t i)
DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
(void *)txq);
--i;
claim_zero(ibv_dereg_mr(txq->mp2mr[0].mr));
mlx4_mr_put(txq->mp2mr[0].mr);
memmove(&txq->mp2mr[0], &txq->mp2mr[1],
(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
}

View File

@ -804,9 +804,8 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
goto error;
}
/* Use the entire Rx mempool as the memory region. */
rxq->mr = mlx4_mp2mr(priv->pd, mp);
rxq->mr = mlx4_mr_get(priv, mp);
if (!rxq->mr) {
rte_errno = EINVAL;
ERROR("%p: MR creation failure: %s",
(void *)dev, strerror(rte_errno));
goto error;
@ -869,6 +868,6 @@ mlx4_rx_queue_release(void *dpdk_rxq)
if (rxq->channel)
claim_zero(ibv_destroy_comp_channel(rxq->channel));
if (rxq->mr)
claim_zero(ibv_dereg_mr(rxq->mr));
mlx4_mr_put(rxq->mr);
rte_free(rxq);
}

View File

@ -67,7 +67,7 @@ struct mlx4_rxq_stats {
struct rxq {
struct priv *priv; /**< Back pointer to private data. */
struct rte_mempool *mp; /**< Memory pool for allocations. */
struct ibv_mr *mr; /**< Memory region (for mp). */
struct mlx4_mr *mr; /**< Memory region. */
struct ibv_cq *cq; /**< Completion queue. */
struct ibv_wq *wq; /**< Work queue. */
struct ibv_comp_channel *channel; /**< Rx completion channel. */
@ -134,7 +134,7 @@ struct txq {
/**< Memory used for storing the first DWORD of data TXBBs. */
struct {
const struct rte_mempool *mp; /**< Cached memory pool. */
struct ibv_mr *mr; /**< Memory region (for mp). */
struct mlx4_mr *mr; /**< Memory region (for mp). */
uint32_t lkey; /**< mr->lkey copy. */
} mp2mr[MLX4_PMD_TX_MP_CACHE]; /**< MP to MR translation table. */
struct priv *priv; /**< Back pointer to private data. */

View File

@ -408,7 +408,7 @@ mlx4_tx_queue_release(void *dpdk_txq)
if (!txq->mp2mr[i].mp)
break;
assert(txq->mp2mr[i].mr);
claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr));
mlx4_mr_put(txq->mp2mr[i].mr);
}
rte_free(txq);
}