common/mlx5: remove redundant parameter in MR search

Memory region management has recently been shared between drivers,
including the search for caches in the data plane.
The initial search in the local linear cache of the queue, usually
yields a result and one should not continue searching in the next level
caches.

The function that searches in the local cache gets the pointer to a
device as a parameter, that is not necessary for its operation
but for subsequent searches (which, as mentioned, usually do not
happen).
Transferring the device to a function and maintaining it, takes some
time and causes some impact on performance.

Add the pointer to the device as a field of the mr_ctrl structure. The
field will be updated during control path and will be used only when
needed in the search.

Fixes: fc59a1ec55 ("common/mlx5: share MR mempool registration")

Signed-off-by: Michael Baum <michaelba@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Reviewed-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
This commit is contained in:
Michael Baum 2021-11-03 12:17:06 +02:00 committed by Thomas Monjalon
parent 6a4e438576
commit 334ed198ab
10 changed files with 43 additions and 78 deletions

View File

@ -292,8 +292,8 @@ mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused)
*
* @param mr_ctrl
* Pointer to MR control structure.
* @param dev_gen_ptr
* Pointer to generation number of global cache.
* @param cdev
* Pointer to the mlx5 device structure.
* @param socket
* NUMA socket on which memory must be allocated.
*
@ -301,15 +301,16 @@ mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused)
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, uint32_t *dev_gen_ptr,
mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, struct mlx5_common_device *cdev,
int socket)
{
if (mr_ctrl == NULL) {
rte_errno = EINVAL;
return -rte_errno;
}
mr_ctrl->cdev = cdev;
/* Save pointer of global generation number to check memory event. */
mr_ctrl->dev_gen_ptr = dev_gen_ptr;
mr_ctrl->dev_gen_ptr = &cdev->mr_scache.dev_gen;
/* Initialize B-tree and allocate memory for bottom-half cache table. */
return mlx5_mr_btree_init(&mr_ctrl->cache_bh, MLX5_MR_BTREE_CACHE_N,
socket);
@ -1860,11 +1861,12 @@ mlx5_mr_mempool2mr_bh(struct mlx5_mr_share_cache *share_cache,
}
uint32_t
mlx5_mr_mb2mr_bh(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id,
struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb)
mlx5_mr_mb2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb,
struct mlx5_mp_id *mp_id)
{
uint32_t lkey;
uintptr_t addr = (uintptr_t)mb->buf_addr;
struct mlx5_common_device *cdev = mr_ctrl->cdev;
if (cdev->config.mr_mempool_reg_en) {
struct rte_mempool *mp = NULL;

View File

@ -66,6 +66,7 @@ struct mlx5_common_device;
/* Per-queue MR control descriptor. */
struct mlx5_mr_ctrl {
struct mlx5_common_device *cdev; /* Pointer to the mlx5 common device.*/
uint32_t *dev_gen_ptr; /* Generation number of device to poll. */
uint32_t cur_gen; /* Generation number saved to flush caches. */
uint16_t mru; /* Index of last hit entry in top-half cache. */
@ -169,41 +170,36 @@ void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl);
* Bottom-half of LKey search on. If supported, lookup for the address from
* the mempool. Otherwise, search in old mechanism caches.
*
* @param cdev
* Pointer to mlx5 device.
* @param mp_id
* Multi-process identifier, may be NULL for the primary process.
* @param mr_ctrl
* Pointer to per-queue MR control structure.
* @param mb
* Pointer to mbuf.
* @param mp_id
* Multi-process identifier, may be NULL for the primary process.
*
* @return
* Searched LKey on success, UINT32_MAX on no match.
*/
__rte_internal
uint32_t mlx5_mr_mb2mr_bh(struct mlx5_common_device *cdev,
struct mlx5_mp_id *mp_id,
struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb);
uint32_t mlx5_mr_mb2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf,
struct mlx5_mp_id *mp_id);
/**
* Query LKey from a packet buffer.
*
* @param cdev
* Pointer to the mlx5 device structure.
* @param mp_id
* Multi-process identifier, may be NULL for the primary process.
* @param mr_ctrl
* Pointer to per-queue MR control structure.
* @param mbuf
* Pointer to mbuf.
* @param mp_id
* Multi-process identifier, may be NULL for the primary process.
*
* @return
* Searched LKey on success, UINT32_MAX on no match.
*/
static __rte_always_inline uint32_t
mlx5_mr_mb2mr(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id,
struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf)
mlx5_mr_mb2mr(struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf,
struct mlx5_mp_id *mp_id)
{
uint32_t lkey;
@ -216,14 +212,14 @@ mlx5_mr_mb2mr(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id,
if (likely(lkey != UINT32_MAX))
return lkey;
/* Take slower bottom-half on miss. */
return mlx5_mr_mb2mr_bh(cdev, mp_id, mr_ctrl, mbuf);
return mlx5_mr_mb2mr_bh(mr_ctrl, mbuf, mp_id);
}
/* mlx5_common_mr.c */
__rte_internal
int mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, uint32_t *dev_gen_ptr,
int socket);
int mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl,
struct mlx5_common_device *cdev, int socket);
__rte_internal
void mlx5_mr_btree_free(struct mlx5_mr_btree *bt);
void mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused);

View File

@ -210,7 +210,7 @@ mlx5_compress_qp_setup(struct rte_compressdev *dev, uint16_t qp_id,
return -rte_errno;
}
dev->data->queue_pairs[qp_id] = qp;
if (mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen,
if (mlx5_mr_ctrl_init(&qp->mr_ctrl, priv->cdev,
priv->dev_config.socket_id)) {
DRV_LOG(ERR, "Cannot allocate MR Btree for qp %u.",
(uint32_t)qp_id);
@ -464,7 +464,7 @@ mlx5_compress_dseg_set(struct mlx5_compress_qp *qp,
uintptr_t addr = rte_pktmbuf_mtod_offset(mbuf, uintptr_t, offset);
dseg->bcount = rte_cpu_to_be_32(len);
dseg->lkey = mlx5_mr_mb2mr(qp->priv->cdev, 0, &qp->mr_ctrl, mbuf);
dseg->lkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf, 0);
dseg->pbuf = rte_cpu_to_be_64(addr);
return dseg->lkey;
}

View File

@ -312,9 +312,9 @@ mlx5_crypto_get_block_size(struct rte_crypto_op *op)
}
static __rte_always_inline uint32_t
mlx5_crypto_klm_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
struct rte_mbuf *mbuf, struct mlx5_wqe_dseg *klm,
uint32_t offset, uint32_t *remain)
mlx5_crypto_klm_set(struct mlx5_crypto_qp *qp, struct rte_mbuf *mbuf,
struct mlx5_wqe_dseg *klm, uint32_t offset,
uint32_t *remain)
{
uint32_t data_len = (rte_pktmbuf_data_len(mbuf) - offset);
uintptr_t addr = rte_pktmbuf_mtod_offset(mbuf, uintptr_t, offset);
@ -324,22 +324,21 @@ mlx5_crypto_klm_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
*remain -= data_len;
klm->bcount = rte_cpu_to_be_32(data_len);
klm->pbuf = rte_cpu_to_be_64(addr);
klm->lkey = mlx5_mr_mb2mr(priv->cdev, 0, &qp->mr_ctrl, mbuf);
klm->lkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf, 0);
return klm->lkey;
}
static __rte_always_inline uint32_t
mlx5_crypto_klms_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
struct rte_crypto_op *op, struct rte_mbuf *mbuf,
struct mlx5_wqe_dseg *klm)
mlx5_crypto_klms_set(struct mlx5_crypto_qp *qp, struct rte_crypto_op *op,
struct rte_mbuf *mbuf, struct mlx5_wqe_dseg *klm)
{
uint32_t remain_len = op->sym->cipher.data.length;
uint32_t nb_segs = mbuf->nb_segs;
uint32_t klm_n = 1u;
/* First mbuf needs to take the cipher offset. */
if (unlikely(mlx5_crypto_klm_set(priv, qp, mbuf, klm,
if (unlikely(mlx5_crypto_klm_set(qp, mbuf, klm,
op->sym->cipher.data.offset, &remain_len) == UINT32_MAX)) {
op->status = RTE_CRYPTO_OP_STATUS_ERROR;
return 0;
@ -351,7 +350,7 @@ mlx5_crypto_klms_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
return 0;
}
if (unlikely(mlx5_crypto_klm_set(priv, qp, mbuf, ++klm, 0,
if (unlikely(mlx5_crypto_klm_set(qp, mbuf, ++klm, 0,
&remain_len) == UINT32_MAX)) {
op->status = RTE_CRYPTO_OP_STATUS_ERROR;
return 0;
@ -377,7 +376,7 @@ mlx5_crypto_wqe_set(struct mlx5_crypto_priv *priv,
uint32_t ds;
bool ipl = op->sym->m_dst == NULL || op->sym->m_dst == op->sym->m_src;
/* Set UMR WQE. */
uint32_t klm_n = mlx5_crypto_klms_set(priv, qp, op,
uint32_t klm_n = mlx5_crypto_klms_set(qp, op,
ipl ? op->sym->m_src : op->sym->m_dst, klms);
if (unlikely(klm_n == 0))
@ -403,8 +402,7 @@ mlx5_crypto_wqe_set(struct mlx5_crypto_priv *priv,
cseg = RTE_PTR_ADD(cseg, priv->umr_wqe_size);
klms = RTE_PTR_ADD(cseg, sizeof(struct mlx5_rdma_write_wqe));
if (!ipl) {
klm_n = mlx5_crypto_klms_set(priv, qp, op, op->sym->m_src,
klms);
klm_n = mlx5_crypto_klms_set(qp, op, op->sym->m_src, klms);
if (unlikely(klm_n == 0))
return 0;
} else {
@ -650,7 +648,7 @@ mlx5_crypto_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
DRV_LOG(ERR, "Failed to create QP.");
goto error;
}
if (mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen,
if (mlx5_mr_ctrl_init(&qp->mr_ctrl, priv->cdev,
priv->dev_config.socket_id) != 0) {
DRV_LOG(ERR, "Cannot allocate MR Btree for qp %u.",
(uint32_t)qp_id);

View File

@ -311,7 +311,6 @@ static __rte_always_inline uint32_t
mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr)
{
struct mlx5_mr_ctrl *mr_ctrl = &rxq->mr_ctrl;
struct mlx5_rxq_ctrl *rxq_ctrl;
struct rte_mempool *mp;
uint32_t lkey;
@ -320,14 +319,9 @@ mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr)
MLX5_MR_CACHE_N, addr);
if (likely(lkey != UINT32_MAX))
return lkey;
/*
* Slower search in the mempool database on miss.
* During queue creation rxq->sh is not yet set, so we use rxq_ctrl.
*/
rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
mp = mlx5_rxq_mprq_enabled(rxq) ? rxq->mprq_mp : rxq->mp;
return mlx5_mr_mempool2mr_bh(&rxq_ctrl->sh->cdev->mr_scache,
mr_ctrl, mp, addr);
return mlx5_mr_mempool2mr_bh(&mr_ctrl->cdev->mr_scache, mr_ctrl,
mp, addr);
}
#define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))

View File

@ -1678,8 +1678,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq,
goto error;
}
tmpl->type = MLX5_RXQ_TYPE_STANDARD;
if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl,
&priv->sh->cdev->mr_scache.dev_gen, socket)) {
if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl, priv->sh->cdev, socket)) {
/* rte_errno is already set. */
goto error;
}

View File

@ -368,10 +368,9 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx5_txq_ctrl *txq_ctrl =
container_of(txq, struct mlx5_txq_ctrl, txq);
struct mlx5_priv *priv = txq_ctrl->priv;
/* Take slower bottom-half on miss. */
return mlx5_mr_mb2mr(priv->sh->cdev, &priv->mp_id, mr_ctrl, mb);
return mlx5_mr_mb2mr(mr_ctrl, mb, &txq_ctrl->priv->mp_id);
}
/**

View File

@ -1134,8 +1134,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
rte_errno = ENOMEM;
return NULL;
}
if (mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl,
&priv->sh->cdev->mr_scache.dev_gen, socket)) {
if (mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl, priv->sh->cdev, socket)) {
/* rte_errno is already set. */
goto error;
}

View File

@ -247,8 +247,7 @@ mlx5_regex_qp_setup(struct rte_regexdev *dev, uint16_t qp_ind,
nb_sq_config++;
}
ret = mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen,
rte_socket_id());
ret = mlx5_mr_ctrl_init(&qp->mr_ctrl, priv->cdev, rte_socket_id());
if (ret) {
DRV_LOG(ERR, "Error setting up mr btree");
goto err_btree;

View File

@ -109,26 +109,6 @@ set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, uint8_t opcode,
seg->imm = imm;
}
/**
* Query LKey from a packet buffer for QP. If not found, add the mempool.
*
* @param priv
* Pointer to the priv object.
* @param mr_ctrl
* Pointer to per-queue MR control structure.
* @param mbuf
* Pointer to source mbuf, to search in.
*
* @return
* Searched LKey on success, UINT32_MAX on no match.
*/
static inline uint32_t
mlx5_regex_mb2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl *mr_ctrl,
struct rte_mbuf *mbuf)
{
return mlx5_mr_mb2mr(priv->cdev, 0, mr_ctrl, mbuf);
}
static inline void
__prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_hw_qp *qp_obj,
struct rte_regex_ops *op, struct mlx5_regex_job *job,
@ -180,7 +160,7 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
struct mlx5_klm klm;
klm.byte_count = rte_pktmbuf_data_len(op->mbuf);
klm.mkey = mlx5_regex_mb2mr(priv, &qp->mr_ctrl, op->mbuf);
klm.mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, op->mbuf, 0);
klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
__prep_one(priv, qp_obj, op, job, qp_obj->pi, &klm);
qp_obj->db_pi = qp_obj->pi;
@ -349,9 +329,8 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
while (mbuf) {
addr = rte_pktmbuf_mtod(mbuf, uintptr_t);
/* Build indirect mkey seg's KLM. */
mkey_klm->mkey = mlx5_regex_mb2mr(priv,
&qp->mr_ctrl,
mbuf);
mkey_klm->mkey = mlx5_mr_mb2mr(&qp->mr_ctrl,
mbuf, 0);
mkey_klm->address = rte_cpu_to_be_64(addr);
mkey_klm->byte_count = rte_cpu_to_be_32
(rte_pktmbuf_data_len(mbuf));
@ -368,7 +347,7 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
klm.byte_count = scatter_size;
} else {
/* The single mubf case. Build the KLM directly. */
klm.mkey = mlx5_regex_mb2mr(priv, &qp->mr_ctrl, mbuf);
klm.mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf, 0);
klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t);
klm.byte_count = rte_pktmbuf_data_len(mbuf);
}