common/mlx5: fix post doorbell barrier

The rdma-core library can map doorbell register in two ways, depending
on the environment variable "MLX5_SHUT_UP_BF":

  - as regular cached memory, the variable is either missing or set to
    zero. This type of mapping may cause the significant doorbell
    register writing latency and requires an explicit memory write
    barrier to mitigate this issue and prevent write combining.

  - as non-cached memory, the variable is present and set to not "0"
    value. This type of mapping may cause performance impact under
    heavy loading conditions but the explicit write memory barrier is
    not required and it may improve core performance.

The UAR creation function maps a doorbell in one of the above ways
according to the system. In run time, it always adds an explicit memory
barrier after writing to.
In cases where the doorbell was mapped as non-cached memory, the
explicit memory barrier is unnecessary and may impair performance.

The commit [1] solved this problem for a Tx queue. In run time, it
checks the mapping type and provides the memory barrier after writing to
a Tx doorbell register if it is needed. The mapping type is extracted
directly from the uar_mmap_offset field in the queue properties.

This patch shares this code between the drivers and extends the above
solution for each of them.

[1] commit 8409a28573
    ("net/mlx5: control transmit doorbell register mapping")

Fixes: f8c97babc9 ("compress/mlx5: add data-path functions")
Fixes: 8e196c08ab ("crypto/mlx5: support enqueue/dequeue operations")
Fixes: 4d4e245ad6 ("regex/mlx5: support enqueue")
Cc: stable@dpdk.org

Signed-off-by: Michael Baum <michaelba@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
This commit is contained in:
Michael Baum 2021-11-03 20:35:13 +02:00 committed by Thomas Monjalon
parent b6e9c33c82
commit 5dfa003db5
24 changed files with 350 additions and 597 deletions

View File

@ -10,6 +10,7 @@
#include <rte_mempool.h>
#include <rte_class.h>
#include <rte_malloc.h>
#include <rte_eal_paging.h>
#include "mlx5_common.h"
#include "mlx5_common_os.h"
@ -930,10 +931,10 @@ RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
* Allocate the User Access Region with DevX on specified device.
* This routine handles the following UAR allocation issues:
*
* - tries to allocate the UAR with the most appropriate memory mapping
* - Try to allocate the UAR with the most appropriate memory mapping
* type from the ones supported by the host.
*
* - tries to allocate the UAR with non-NULL base address OFED 5.0.x and
* - Try to allocate the UAR with non-NULL base address OFED 5.0.x and
* Upstream rdma_core before v29 returned the NULL as UAR base address
* if UAR was not the first object in the UAR page.
* It caused the PMD failure and we should try to get another UAR till
@ -945,7 +946,7 @@ RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
* @return
* UAR object pointer on success, NULL otherwise and rte_errno is set.
*/
void *
static void *
mlx5_devx_alloc_uar(struct mlx5_common_device *cdev)
{
void *uar;
@ -1015,4 +1016,46 @@ mlx5_devx_alloc_uar(struct mlx5_common_device *cdev)
return uar;
}
void
mlx5_devx_uar_release(struct mlx5_uar *uar)
{
if (uar->obj != NULL)
mlx5_glue->devx_free_uar(uar->obj);
memset(uar, 0, sizeof(*uar));
}
int
mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar)
{
off_t uar_mmap_offset;
const size_t page_size = rte_mem_page_size();
void *base_addr;
void *uar_obj;
if (page_size == (size_t)-1) {
DRV_LOG(ERR, "Failed to get mem page size");
rte_errno = ENOMEM;
return -1;
}
uar_obj = mlx5_devx_alloc_uar(cdev);
if (uar_obj == NULL || mlx5_os_get_devx_uar_reg_addr(uar_obj) == NULL) {
rte_errno = errno;
DRV_LOG(ERR, "Failed to allocate UAR.");
return -1;
}
uar->obj = uar_obj;
uar_mmap_offset = mlx5_os_get_devx_uar_mmap_offset(uar_obj);
base_addr = mlx5_os_get_devx_uar_base_addr(uar_obj);
uar->dbnc = mlx5_db_map_type_get(uar_mmap_offset, page_size);
uar->bf_db.db = mlx5_os_get_devx_uar_reg_addr(uar_obj);
uar->cq_db.db = RTE_PTR_ADD(base_addr, MLX5_CQ_DOORBELL);
#ifndef RTE_ARCH_64
rte_spinlock_init(&uar->bf_sl);
rte_spinlock_init(&uar->cq_sl);
uar->bf_db.sl_p = &uar->bf_sl;
uar->cq_db.sl_p = &uar->cq_sl;
#endif /* RTE_ARCH_64 */
return 0;
}
RTE_PMD_EXPORT_NAME(mlx5_common_driver, __COUNTER__);

View File

@ -280,6 +280,87 @@ struct mlx5_klm {
uint64_t address;
};
/* All UAR arguments using doorbell register in datapath. */
struct mlx5_uar_data {
uint64_t *db;
/* The doorbell's virtual address mapped to the relevant HW UAR space.*/
#ifndef RTE_ARCH_64
rte_spinlock_t *sl_p;
/* Pointer to UAR access lock required for 32bit implementations. */
#endif /* RTE_ARCH_64 */
};
/* DevX UAR control structure. */
struct mlx5_uar {
struct mlx5_uar_data bf_db; /* UAR data for Blueflame register. */
struct mlx5_uar_data cq_db; /* UAR data for CQ arm db register. */
void *obj; /* DevX UAR object. */
bool dbnc; /* Doorbell mapped to non-cached region. */
#ifndef RTE_ARCH_64
rte_spinlock_t bf_sl;
rte_spinlock_t cq_sl;
/* UAR access locks required for 32bit implementations. */
#endif /* RTE_ARCH_64 */
};
/**
* Ring a doorbell and flush the update if requested.
*
* @param uar
* Pointer to UAR data structure.
* @param val
* value to write in big endian format.
* @param index
* Index of doorbell record.
* @param db_rec
* Address of doorbell record.
* @param flash
* Decide whether to flush the DB writing using a memory barrier.
*/
static __rte_always_inline void
mlx5_doorbell_ring(struct mlx5_uar_data *uar, uint64_t val, uint32_t index,
volatile uint32_t *db_rec, bool flash)
{
rte_io_wmb();
*db_rec = rte_cpu_to_be_32(index);
/* Ensure ordering between DB record actual update and UAR access. */
rte_wmb();
#ifdef RTE_ARCH_64
*uar->db = val;
#else /* !RTE_ARCH_64 */
rte_spinlock_lock(uar->sl_p);
*(volatile uint32_t *)uar->db = val;
rte_io_wmb();
*((volatile uint32_t *)uar->db + 1) = val >> 32;
rte_spinlock_unlock(uar->sl_p);
#endif
if (flash)
rte_wmb();
}
/**
* Get the doorbell register mapping type.
*
* @param uar_mmap_offset
* Mmap offset of Verbs/DevX UAR.
* @param page_size
* System page size
*
* @return
* 1 for non-cached, 0 otherwise.
*/
static inline uint16_t
mlx5_db_map_type_get(off_t uar_mmap_offset, size_t page_size)
{
off_t cmd = uar_mmap_offset / page_size;
cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
cmd &= MLX5_UAR_MMAP_CMD_MASK;
if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
return 1;
return 0;
}
__rte_internal
void mlx5_translate_port_name(const char *port_name_in,
struct mlx5_switch_info *port_info_out);
@ -416,7 +497,12 @@ mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev,
struct rte_mempool *mp);
__rte_internal
void *mlx5_devx_alloc_uar(struct mlx5_common_device *cdev);
int
mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar);
__rte_internal
void
mlx5_devx_uar_release(struct mlx5_uar *uar);
/* mlx5_common_os.c */

View File

@ -39,6 +39,14 @@
#define MLX5_TXDB_NCACHED 1
#define MLX5_TXDB_HEURISTIC 2
/* Fields of memory mapping type in offset parameter of mmap() */
#define MLX5_UAR_MMAP_CMD_SHIFT 8
#define MLX5_UAR_MMAP_CMD_MASK 0xff
#ifndef HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD
#define MLX5_MMAP_GET_NC_PAGES_CMD 3
#endif
#define MLX5_VDPA_MAX_RETRIES 20
#define MLX5_VDPA_USEC 1000

View File

@ -16,8 +16,6 @@ INTERNAL {
mlx5_dev_mempool_unregister;
mlx5_dev_mempool_subscribe;
mlx5_devx_alloc_uar;
mlx5_devx_cmd_alloc_pd;
mlx5_devx_cmd_create_conn_track_offload_obj;
mlx5_devx_cmd_create_cq;
@ -76,6 +74,9 @@ INTERNAL {
mlx5_devx_sq_create;
mlx5_devx_sq_destroy;
mlx5_devx_uar_prepare;
mlx5_devx_uar_release;
mlx5_free;
mlx5_get_ifname_sysfs; # WINDOWS_NO_EXPORT

View File

@ -41,14 +41,13 @@ struct mlx5_compress_priv {
TAILQ_ENTRY(mlx5_compress_priv) next;
struct rte_compressdev *compressdev;
struct mlx5_common_device *cdev; /* Backend mlx5 device. */
void *uar;
struct mlx5_uar uar;
uint8_t min_block_size;
/* Minimum huffman block size supported by the device. */
struct rte_compressdev_config dev_config;
LIST_HEAD(xform_list, mlx5_compress_xform) xform_list;
rte_spinlock_t xform_sl;
volatile uint64_t *uar_addr;
/* HCA caps*/
/* HCA caps */
uint32_t mmo_decomp_sq:1;
uint32_t mmo_decomp_qp:1;
uint32_t mmo_comp_sq:1;
@ -56,9 +55,6 @@ struct mlx5_compress_priv {
uint32_t mmo_dma_sq:1;
uint32_t mmo_dma_qp:1;
uint32_t log_block_sz;
#ifndef RTE_ARCH_64
rte_spinlock_t uar32_sl;
#endif /* RTE_ARCH_64 */
};
struct mlx5_compress_qp {
@ -188,11 +184,11 @@ mlx5_compress_qp_setup(struct rte_compressdev *dev, uint16_t qp_id,
struct mlx5_compress_priv *priv = dev->data->dev_private;
struct mlx5_compress_qp *qp;
struct mlx5_devx_cq_attr cq_attr = {
.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar),
.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
};
struct mlx5_devx_qp_attr qp_attr = {
.pd = priv->cdev->pdn,
.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar),
.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
.user_index = qp_id,
};
uint32_t log_ops_n = rte_log2_u32(max_inflight_ops);
@ -469,24 +465,6 @@ mlx5_compress_dseg_set(struct mlx5_compress_qp *qp,
return dseg->lkey;
}
/*
* Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
* 64bit architectures.
*/
static __rte_always_inline void
mlx5_compress_uar_write(uint64_t val, struct mlx5_compress_priv *priv)
{
#ifdef RTE_ARCH_64
*priv->uar_addr = val;
#else /* !RTE_ARCH_64 */
rte_spinlock_lock(&priv->uar32_sl);
*(volatile uint32_t *)priv->uar_addr = val;
rte_io_wmb();
*((volatile uint32_t *)priv->uar_addr + 1) = val >> 32;
rte_spinlock_unlock(&priv->uar32_sl);
#endif
}
static uint16_t
mlx5_compress_enqueue_burst(void *queue_pair, struct rte_comp_op **ops,
uint16_t nb_ops)
@ -547,11 +525,9 @@ mlx5_compress_enqueue_burst(void *queue_pair, struct rte_comp_op **ops,
qp->pi++;
} while (--remain);
qp->stats.enqueued_count += nb_ops;
rte_io_wmb();
qp->qp.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(qp->pi);
rte_wmb();
mlx5_compress_uar_write(*(volatile uint64_t *)wqe, qp->priv);
rte_wmb();
mlx5_doorbell_ring(&qp->priv->uar.bf_db, *(volatile uint64_t *)wqe,
qp->pi, &qp->qp.db_rec[MLX5_SND_DBR],
!qp->priv->uar.dbnc);
return nb_ops;
}
@ -671,33 +647,6 @@ mlx5_compress_dequeue_burst(void *queue_pair, struct rte_comp_op **ops,
return i;
}
static void
mlx5_compress_uar_release(struct mlx5_compress_priv *priv)
{
if (priv->uar != NULL) {
mlx5_glue->devx_free_uar(priv->uar);
priv->uar = NULL;
}
}
static int
mlx5_compress_uar_prepare(struct mlx5_compress_priv *priv)
{
priv->uar = mlx5_devx_alloc_uar(priv->cdev);
if (priv->uar == NULL || mlx5_os_get_devx_uar_reg_addr(priv->uar) ==
NULL) {
rte_errno = errno;
DRV_LOG(ERR, "Failed to allocate UAR.");
return -1;
}
priv->uar_addr = mlx5_os_get_devx_uar_reg_addr(priv->uar);
MLX5_ASSERT(priv->uar_addr);
#ifndef RTE_ARCH_64
rte_spinlock_init(&priv->uar32_sl);
#endif /* RTE_ARCH_64 */
return 0;
}
static int
mlx5_compress_args_check_handler(const char *key, const char *val, void *opaque)
{
@ -801,7 +750,7 @@ mlx5_compress_dev_probe(struct mlx5_common_device *cdev)
priv->cdev = cdev;
priv->compressdev = compressdev;
priv->min_block_size = attr->compress_min_block_size;
if (mlx5_compress_uar_prepare(priv) != 0) {
if (mlx5_devx_uar_prepare(cdev, &priv->uar) != 0) {
rte_compressdev_pmd_destroy(priv->compressdev);
return -1;
}
@ -824,7 +773,7 @@ mlx5_compress_dev_remove(struct mlx5_common_device *cdev)
TAILQ_REMOVE(&mlx5_compress_priv_list, priv, next);
pthread_mutex_unlock(&priv_list_lock);
if (priv) {
mlx5_compress_uar_release(priv);
mlx5_devx_uar_release(&priv->uar);
rte_compressdev_pmd_destroy(priv->compressdev);
}
return 0;

View File

@ -427,20 +427,6 @@ mlx5_crypto_wqe_set(struct mlx5_crypto_priv *priv,
return 1;
}
static __rte_always_inline void
mlx5_crypto_uar_write(uint64_t val, struct mlx5_crypto_priv *priv)
{
#ifdef RTE_ARCH_64
*priv->uar_addr = val;
#else /* !RTE_ARCH_64 */
rte_spinlock_lock(&priv->uar32_sl);
*(volatile uint32_t *)priv->uar_addr = val;
rte_io_wmb();
*((volatile uint32_t *)priv->uar_addr + 1) = val >> 32;
rte_spinlock_unlock(&priv->uar32_sl);
#endif
}
static uint16_t
mlx5_crypto_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops,
uint16_t nb_ops)
@ -476,11 +462,9 @@ mlx5_crypto_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops,
qp->pi++;
} while (--remain);
qp->stats.enqueued_count += nb_ops;
rte_io_wmb();
qp->qp_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(qp->db_pi);
rte_wmb();
mlx5_crypto_uar_write(*(volatile uint64_t *)qp->wqe, qp->priv);
rte_wmb();
mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)qp->wqe,
qp->db_pi, &qp->qp_obj.db_rec[MLX5_SND_DBR],
!priv->uar.dbnc);
return nb_ops;
}
@ -614,7 +598,7 @@ mlx5_crypto_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
uint32_t ret;
uint32_t alloc_size = sizeof(*qp);
struct mlx5_devx_cq_attr cq_attr = {
.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar),
.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
};
if (dev->data->queue_pairs[qp_id] != NULL)
@ -636,7 +620,7 @@ mlx5_crypto_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
goto error;
}
attr.pd = priv->cdev->pdn;
attr.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar);
attr.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj);
attr.cqn = qp->cq_obj.cq->id;
attr.rq_size = 0;
attr.sq_size = RTE_BIT32(log_nb_desc);
@ -724,30 +708,6 @@ static struct rte_cryptodev_ops mlx5_crypto_ops = {
.sym_configure_raw_dp_ctx = NULL,
};
static void
mlx5_crypto_uar_release(struct mlx5_crypto_priv *priv)
{
if (priv->uar != NULL) {
mlx5_glue->devx_free_uar(priv->uar);
priv->uar = NULL;
}
}
static int
mlx5_crypto_uar_prepare(struct mlx5_crypto_priv *priv)
{
priv->uar = mlx5_devx_alloc_uar(priv->cdev);
if (priv->uar)
priv->uar_addr = mlx5_os_get_devx_uar_reg_addr(priv->uar);
if (priv->uar == NULL || priv->uar_addr == NULL) {
rte_errno = errno;
DRV_LOG(ERR, "Failed to allocate UAR.");
return -1;
}
return 0;
}
static int
mlx5_crypto_args_check_handler(const char *key, const char *val, void *opaque)
{
@ -899,7 +859,7 @@ mlx5_crypto_dev_probe(struct mlx5_common_device *cdev)
priv = crypto_dev->data->dev_private;
priv->cdev = cdev;
priv->crypto_dev = crypto_dev;
if (mlx5_crypto_uar_prepare(priv) != 0) {
if (mlx5_devx_uar_prepare(cdev, &priv->uar) != 0) {
rte_cryptodev_pmd_destroy(priv->crypto_dev);
return -1;
}
@ -907,7 +867,7 @@ mlx5_crypto_dev_probe(struct mlx5_common_device *cdev)
&devarg_prms.login_attr);
if (login == NULL) {
DRV_LOG(ERR, "Failed to configure login.");
mlx5_crypto_uar_release(priv);
mlx5_devx_uar_release(&priv->uar);
rte_cryptodev_pmd_destroy(priv->crypto_dev);
return -rte_errno;
}
@ -950,7 +910,7 @@ mlx5_crypto_dev_remove(struct mlx5_common_device *cdev)
pthread_mutex_unlock(&priv_list_lock);
if (priv) {
claim_zero(mlx5_devx_cmd_destroy(priv->login_obj));
mlx5_crypto_uar_release(priv);
mlx5_devx_uar_release(&priv->uar);
rte_cryptodev_pmd_destroy(priv->crypto_dev);
}
return 0;

View File

@ -21,8 +21,7 @@ struct mlx5_crypto_priv {
TAILQ_ENTRY(mlx5_crypto_priv) next;
struct mlx5_common_device *cdev; /* Backend mlx5 device. */
struct rte_cryptodev *crypto_dev;
void *uar; /* User Access Region. */
volatile uint64_t *uar_addr;
struct mlx5_uar uar; /* User Access Region. */
uint32_t max_segs_num; /* Maximum supported data segs. */
struct mlx5_hlist *dek_hlist; /* Dek hash list. */
struct rte_cryptodev_config dev_config;
@ -32,9 +31,6 @@ struct mlx5_crypto_priv {
uint16_t umr_wqe_size;
uint16_t umr_wqe_stride;
uint16_t max_rdmar_ds;
#ifndef RTE_ARCH_64
rte_spinlock_t uar32_sl;
#endif /* RTE_ARCH_64 */
};
struct mlx5_crypto_qp {

View File

@ -16,6 +16,7 @@
#include <rte_malloc.h>
#include <ethdev_driver.h>
#include <rte_common.h>
#include <rte_eal_paging.h>
#include <mlx5_glue.h>
#include <mlx5_common.h>
@ -367,7 +368,10 @@ mlx5_rxq_ibv_obj_new(struct mlx5_rxq_priv *rxq)
rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
rxq_data->cq_db = cq_info.dbrec;
rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
rxq_data->cq_uar = cq_info.cq_uar;
rxq_data->uar_data.db = RTE_PTR_ADD(cq_info.cq_uar, MLX5_CQ_DOORBELL);
#ifndef RTE_ARCH_64
rxq_data->uar_data.sl_p = &priv->sh->uar_lock_cq;
#endif
rxq_data->cqn = cq_info.cqn;
/* Create WQ (RQ) using Verbs API. */
tmpl->wq = mlx5_rxq_ibv_wq_create(rxq);
@ -897,6 +901,42 @@ mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx)
return qp_obj;
}
/**
* Initialize Tx UAR registers for primary process.
*
* @param txq_ctrl
* Pointer to Tx queue control structure.
* @param bf_reg
* BlueFlame register from Verbs UAR.
*/
static void
mlx5_txq_ibv_uar_init(struct mlx5_txq_ctrl *txq_ctrl, void *bf_reg)
{
struct mlx5_priv *priv = txq_ctrl->priv;
struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
const size_t page_size = rte_mem_page_size();
struct mlx5_txq_data *txq = &txq_ctrl->txq;
off_t uar_mmap_offset = txq_ctrl->uar_mmap_offset;
#ifndef RTE_ARCH_64
unsigned int lock_idx;
#endif
MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
MLX5_ASSERT(ppriv);
if (page_size == (size_t)-1) {
DRV_LOG(ERR, "Failed to get mem page size");
rte_errno = ENOMEM;
}
txq->db_heu = priv->sh->cdev->config.dbnc == MLX5_TXDB_HEURISTIC;
txq->db_nc = mlx5_db_map_type_get(uar_mmap_offset, page_size);
ppriv->uar_table[txq->idx].db = bf_reg;
#ifndef RTE_ARCH_64
/* Assign an UAR lock according to UAR page number. */
lock_idx = (uar_mmap_offset / page_size) & MLX5_UAR_PAGE_NUM_MASK;
ppriv->uar_table[txq->idx].sl_p = &priv->sh->uar_lock[lock_idx];
#endif
}
/**
* Create the Tx queue Verbs object.
*
@ -1028,7 +1068,7 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
rte_errno = EINVAL;
goto error;
}
txq_uar_init(txq_ctrl, qp.bf.reg);
mlx5_txq_ibv_uar_init(txq_ctrl, qp.bf.reg);
dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
return 0;
error:

View File

@ -19,6 +19,7 @@
#include <rte_rwlock.h>
#include <rte_spinlock.h>
#include <rte_string_fns.h>
#include <rte_eal_paging.h>
#include <rte_alarm.h>
#include <rte_cycles.h>
@ -987,143 +988,35 @@ mlx5_get_supported_tunneling_offloads(const struct mlx5_hca_attr *attr)
return tn_offloads;
}
/*
* Allocate Rx and Tx UARs in robust fashion.
* This routine handles the following UAR allocation issues:
*
* - tries to allocate the UAR with the most appropriate memory
* mapping type from the ones supported by the host
*
* - tries to allocate the UAR with non-NULL base address
* OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as
* UAR base address if UAR was not the first object in the UAR page.
* It caused the PMD failure and we should try to get another UAR
* till we get the first one with non-NULL base address returned.
*/
/* Fill all fields of UAR structure. */
static int
mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
const struct mlx5_common_dev_config *config)
mlx5_rxtx_uars_prepare(struct mlx5_dev_ctx_shared *sh)
{
uint32_t uar_mapping, retry;
int err = 0;
void *base_addr;
int ret;
for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
/* Control the mapping type according to the settings. */
uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ?
MLX5DV_UAR_ALLOC_TYPE_NC :
MLX5DV_UAR_ALLOC_TYPE_BF;
#else
RTE_SET_USED(config);
/*
* It seems we have no way to control the memory mapping type
* for the UAR, the default "Write-Combining" type is supposed.
* The UAR initialization on queue creation queries the
* actual mapping type done by Verbs/kernel and setups the
* PMD datapath accordingly.
*/
uar_mapping = 0;
#endif
sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->cdev->ctx,
uar_mapping);
#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
if (!sh->tx_uar &&
uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
if (config->dbnc == MLX5_TXDB_CACHED ||
config->dbnc == MLX5_TXDB_HEURISTIC)
DRV_LOG(WARNING, "Devarg tx_db_nc setting "
"is not supported by DevX");
/*
* In some environments like virtual machine
* the Write Combining mapped might be not supported
* and UAR allocation fails. We try "Non-Cached"
* mapping for the case. The tx_burst routines take
* the UAR mapping type into account on UAR setup
* on queue creation.
*/
DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (BF)");
uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->cdev->ctx,
uar_mapping);
} else if (!sh->tx_uar &&
uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) {
if (config->dbnc == MLX5_TXDB_NCACHED)
DRV_LOG(WARNING, "Devarg tx_db_nc settings "
"is not supported by DevX");
/*
* If Verbs/kernel does not support "Non-Cached"
* try the "Write-Combining".
*/
DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (NC)");
uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF;
sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->cdev->ctx,
uar_mapping);
}
#endif
if (!sh->tx_uar) {
DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)");
err = ENOMEM;
goto exit;
}
base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar);
if (base_addr)
break;
/*
* The UARs are allocated by rdma_core within the
* IB device context, on context closure all UARs
* will be freed, should be no memory/object leakage.
*/
DRV_LOG(DEBUG, "Retrying to allocate Tx DevX UAR");
sh->tx_uar = NULL;
ret = mlx5_devx_uar_prepare(sh->cdev, &sh->tx_uar);
if (ret) {
DRV_LOG(ERR, "Failed to prepare Tx DevX UAR.");
return -rte_errno;
}
/* Check whether we finally succeeded with valid UAR allocation. */
if (!sh->tx_uar) {
DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)");
err = ENOMEM;
goto exit;
MLX5_ASSERT(sh->tx_uar.obj);
MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar.obj));
ret = mlx5_devx_uar_prepare(sh->cdev, &sh->rx_uar);
if (ret) {
DRV_LOG(ERR, "Failed to prepare Rx DevX UAR.");
mlx5_devx_uar_release(&sh->tx_uar);
return -rte_errno;
}
for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
uar_mapping = 0;
sh->devx_rx_uar = mlx5_glue->devx_alloc_uar(sh->cdev->ctx,
uar_mapping);
#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
if (!sh->devx_rx_uar &&
uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
/*
* Rx UAR is used to control interrupts only,
* should be no datapath noticeable impact,
* can try "Non-Cached" mapping safely.
*/
DRV_LOG(DEBUG, "Failed to allocate Rx DevX UAR (BF)");
uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
(sh->cdev->ctx, uar_mapping);
}
#endif
if (!sh->devx_rx_uar) {
DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)");
err = ENOMEM;
goto exit;
}
base_addr = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar);
if (base_addr)
break;
/*
* The UARs are allocated by rdma_core within the
* IB device context, on context closure all UARs
* will be freed, should be no memory/object leakage.
*/
DRV_LOG(DEBUG, "Retrying to allocate Rx DevX UAR");
sh->devx_rx_uar = NULL;
}
/* Check whether we finally succeeded with valid UAR allocation. */
if (!sh->devx_rx_uar) {
DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)");
err = ENOMEM;
}
exit:
return err;
MLX5_ASSERT(sh->rx_uar.obj);
MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->rx_uar.obj));
return 0;
}
static void
mlx5_rxtx_uars_release(struct mlx5_dev_ctx_shared *sh)
{
mlx5_devx_uar_release(&sh->rx_uar);
mlx5_devx_uar_release(&sh->tx_uar);
}
/**
@ -1332,21 +1225,17 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
err = ENOMEM;
goto error;
}
err = mlx5_alloc_rxtx_uars(sh, &sh->cdev->config);
err = mlx5_rxtx_uars_prepare(sh);
if (err)
goto error;
MLX5_ASSERT(sh->tx_uar);
MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar));
MLX5_ASSERT(sh->devx_rx_uar);
MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar));
}
#ifndef RTE_ARCH_64
/* Initialize UAR access locks for 32bit implementations. */
rte_spinlock_init(&sh->uar_lock_cq);
for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
rte_spinlock_init(&sh->uar_lock[i]);
} else {
/* Initialize UAR access locks for 32bit implementations. */
rte_spinlock_init(&sh->uar_lock_cq);
for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
rte_spinlock_init(&sh->uar_lock[i]);
#endif
}
mlx5_os_dev_shared_handler_install(sh);
if (LIST_EMPTY(&mlx5_dev_ctx_list)) {
err = mlx5_flow_os_init_workspace_once();
@ -1373,10 +1262,7 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
if (sh->tis[i])
claim_zero(mlx5_devx_cmd_destroy(sh->tis[i]));
} while (++i < (uint32_t)sh->bond.n_port);
if (sh->devx_rx_uar)
mlx5_glue->devx_free_uar(sh->devx_rx_uar);
if (sh->tx_uar)
mlx5_glue->devx_free_uar(sh->tx_uar);
mlx5_rxtx_uars_release(sh);
mlx5_free(sh);
MLX5_ASSERT(err > 0);
rte_errno = err;
@ -1449,18 +1335,13 @@ mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
mlx5_aso_flow_mtrs_mng_close(sh);
mlx5_flow_ipool_destroy(sh);
mlx5_os_dev_shared_handler_uninstall(sh);
if (sh->tx_uar) {
mlx5_glue->devx_free_uar(sh->tx_uar);
sh->tx_uar = NULL;
}
mlx5_rxtx_uars_release(sh);
do {
if (sh->tis[i])
claim_zero(mlx5_devx_cmd_destroy(sh->tis[i]));
} while (++i < sh->bond.n_port);
if (sh->td)
claim_zero(mlx5_devx_cmd_destroy(sh->td));
if (sh->devx_rx_uar)
mlx5_glue->devx_free_uar(sh->devx_rx_uar);
MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL);
pthread_mutex_destroy(&sh->txpp.mutex);
mlx5_free(sh);
@ -1610,8 +1491,8 @@ mlx5_proc_priv_init(struct rte_eth_dev *dev)
* UAR register table follows the process private structure. BlueFlame
* registers for Tx queues are stored in the table.
*/
ppriv_size =
sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *);
ppriv_size = sizeof(struct mlx5_proc_priv) +
priv->txqs_n * sizeof(struct mlx5_uar_data);
ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size,
RTE_CACHE_LINE_SIZE, dev->device->numa_node);
if (!ppriv) {

View File

@ -530,7 +530,6 @@ struct mlx5_aso_sq {
rte_spinlock_t sqsl;
struct mlx5_aso_cq cq;
struct mlx5_devx_sq sq_obj;
volatile uint64_t *uar_addr;
struct mlx5_pmd_mr mr;
uint16_t pi;
uint32_t head;
@ -1176,7 +1175,7 @@ struct mlx5_dev_ctx_shared {
void *rx_domain; /* RX Direct Rules name space handle. */
void *tx_domain; /* TX Direct Rules name space handle. */
#ifndef RTE_ARCH_64
rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR. */
rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
/* UAR same-page access control required in 32bit implementations. */
#endif
@ -1205,11 +1204,11 @@ struct mlx5_dev_ctx_shared {
struct mlx5_devx_obj *tis[16]; /* TIS object. */
struct mlx5_devx_obj *td; /* Transport domain. */
struct mlx5_lag lag; /* LAG attributes */
void *tx_uar; /* Tx/packet pacing shared UAR. */
struct mlx5_uar tx_uar; /* DevX UAR for Tx and Txpp and ASO SQs. */
struct mlx5_uar rx_uar; /* DevX UAR for Rx. */
struct mlx5_proc_priv *pppriv; /* Pointer to primary private process. */
struct mlx5_ecpri_parser_profile ecpri_parser;
/* Flex parser profiles information. */
void *devx_rx_uar; /* DevX UAR for Rx. */
LIST_HEAD(shared_rxqs, mlx5_rxq_ctrl) shared_rxqs; /* Shared RXQs. */
struct mlx5_aso_age_mng *aso_age_mng;
/* Management data for aging mechanism using ASO Flow Hit. */
@ -1234,7 +1233,7 @@ struct mlx5_dev_ctx_shared {
struct mlx5_proc_priv {
size_t uar_table_sz;
/* Size of UAR register table. */
void *uar_table[];
struct mlx5_uar_data uar_table[];
/* Table of UAR registers for each process. */
};
@ -1791,6 +1790,7 @@ int mlx5_flow_meter_flush(struct rte_eth_dev *dev,
void mlx5_flow_meter_rxq_flush(struct rte_eth_dev *dev);
/* mlx5_os.c */
struct rte_pci_driver;
int mlx5_os_get_dev_attr(struct mlx5_common_device *dev,
struct mlx5_dev_attr *dev_attr);

View File

@ -112,14 +112,6 @@
#define MLX5_UAR_PAGE_NUM_MAX 64
#define MLX5_UAR_PAGE_NUM_MASK ((MLX5_UAR_PAGE_NUM_MAX) - 1)
/* Fields of memory mapping type in offset parameter of mmap() */
#define MLX5_UAR_MMAP_CMD_SHIFT 8
#define MLX5_UAR_MMAP_CMD_MASK 0xff
#ifndef HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD
#define MLX5_MMAP_GET_NC_PAGES_CMD 3
#endif
/* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */
#define MLX5_MPRQ_STRIDE_NUM_N 6U

View File

@ -363,7 +363,7 @@ mlx5_rxq_create_devx_cq_resources(struct mlx5_rxq_priv *rxq)
"Port %u Rx CQE compression is disabled for LRO.",
port_id);
}
cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->devx_rx_uar);
cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->rx_uar.obj);
log_cqe_n = log2above(cqe_n);
/* Create CQ using DevX API. */
ret = mlx5_devx_cq_create(sh->cdev->ctx, &rxq_ctrl->obj->cq_obj,
@ -374,7 +374,7 @@ mlx5_rxq_create_devx_cq_resources(struct mlx5_rxq_priv *rxq)
rxq_data->cqes = (volatile struct mlx5_cqe (*)[])
(uintptr_t)cq_obj->cqes;
rxq_data->cq_db = cq_obj->db_rec;
rxq_data->cq_uar = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar);
rxq_data->uar_data = sh->rx_uar.cq_db;
rxq_data->cqe_n = log_cqe_n;
rxq_data->cqn = cq_obj->cq->id;
rxq_data->cq_ci = 0;
@ -1185,6 +1185,7 @@ mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx,
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_common_device *cdev = priv->sh->cdev;
struct mlx5_uar *uar = &priv->sh->tx_uar;
struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
struct mlx5_txq_ctrl *txq_ctrl =
container_of(txq_data, struct mlx5_txq_ctrl, txq);
@ -1198,8 +1199,7 @@ mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx,
.tis_lst_sz = 1,
.wq_attr = (struct mlx5_devx_wq_attr){
.pd = cdev->pdn,
.uar_page =
mlx5_os_get_devx_uar_page_id(priv->sh->tx_uar),
.uar_page = mlx5_os_get_devx_uar_page_id(uar->obj),
},
.ts_format =
mlx5_ts_format_conv(cdev->config.hca_attr.sq_ts_format),
@ -1239,10 +1239,11 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
rte_errno = ENOMEM;
return -rte_errno;
#else
struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
struct mlx5_dev_ctx_shared *sh = priv->sh;
struct mlx5_txq_obj *txq_obj = txq_ctrl->obj;
struct mlx5_devx_cq_attr cq_attr = {
.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar),
.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar.obj),
};
uint32_t cqe_n, log_desc_n;
uint32_t wqe_n, wqe_size;
@ -1250,6 +1251,8 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
MLX5_ASSERT(txq_data);
MLX5_ASSERT(txq_obj);
MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
MLX5_ASSERT(ppriv);
txq_obj->txq_ctrl = txq_ctrl;
txq_obj->dev = dev;
cqe_n = (1UL << txq_data->elts_n) / MLX5_TX_COMP_THRESH +
@ -1322,6 +1325,8 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
txq_data->qp_db = &txq_obj->sq_obj.db_rec[MLX5_SND_DBR];
*txq_data->qp_db = 0;
txq_data->qp_num_8s = txq_obj->sq_obj.sq->id << 8;
txq_data->db_heu = sh->cdev->config.dbnc == MLX5_TXDB_HEURISTIC;
txq_data->db_nc = sh->tx_uar.dbnc;
/* Change Send Queue state to Ready-to-Send. */
ret = mlx5_txq_devx_modify(txq_obj, MLX5_TXQ_MOD_RST2RDY, 0);
if (ret) {
@ -1340,10 +1345,9 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
if (!priv->sh->tdn)
priv->sh->tdn = priv->sh->td->id;
#endif
MLX5_ASSERT(sh->tx_uar && mlx5_os_get_devx_uar_reg_addr(sh->tx_uar));
txq_ctrl->uar_mmap_offset =
mlx5_os_get_devx_uar_mmap_offset(sh->tx_uar);
txq_uar_init(txq_ctrl, mlx5_os_get_devx_uar_reg_addr(sh->tx_uar));
mlx5_os_get_devx_uar_mmap_offset(sh->tx_uar.obj);
ppriv->uar_table[txq_data->idx] = sh->tx_uar.bf_db;
dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
return 0;
error:

View File

@ -285,7 +285,6 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar,
sq->head = 0;
sq->tail = 0;
sq->sqn = sq->sq_obj.sq->id;
sq->uar_addr = mlx5_os_get_devx_uar_reg_addr(uar);
rte_spinlock_init(&sq->sqsl);
return 0;
error:
@ -317,7 +316,7 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
sq_desc_n, &sh->aso_age_mng->aso_sq.mr, 0))
return -1;
if (mlx5_aso_sq_create(cdev->ctx, &sh->aso_age_mng->aso_sq, 0,
sh->tx_uar, cdev->pdn,
sh->tx_uar.obj, cdev->pdn,
MLX5_ASO_QUEUE_LOG_DESC,
cdev->config.hca_attr.sq_ts_format)) {
mlx5_aso_dereg_mr(cdev, &sh->aso_age_mng->aso_sq.mr);
@ -327,7 +326,7 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
break;
case ASO_OPC_MOD_POLICER:
if (mlx5_aso_sq_create(cdev->ctx, &sh->mtrmng->pools_mng.sq, 0,
sh->tx_uar, cdev->pdn,
sh->tx_uar.obj, cdev->pdn,
MLX5_ASO_QUEUE_LOG_DESC,
cdev->config.hca_attr.sq_ts_format))
return -1;
@ -339,7 +338,7 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
&sh->ct_mng->aso_sq.mr, 0))
return -1;
if (mlx5_aso_sq_create(cdev->ctx, &sh->ct_mng->aso_sq, 0,
sh->tx_uar, cdev->pdn,
sh->tx_uar.obj, cdev->pdn,
MLX5_ASO_QUEUE_LOG_DESC,
cdev->config.hca_attr.sq_ts_format)) {
mlx5_aso_dereg_mr(cdev, &sh->ct_mng->aso_sq.mr);
@ -390,8 +389,8 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
/**
* Write a burst of WQEs to ASO SQ.
*
* @param[in] mng
* ASO management data, contains the SQ.
* @param[in] sh
* Pointer to shared device context.
* @param[in] n
* Index of the last valid pool.
*
@ -399,8 +398,9 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
* Number of WQEs in burst.
*/
static uint16_t
mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
mlx5_aso_sq_enqueue_burst(struct mlx5_dev_ctx_shared *sh, uint16_t n)
{
struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
volatile struct mlx5_aso_wqe *wqe;
struct mlx5_aso_sq *sq = &mng->aso_sq;
struct mlx5_aso_age_pool *pool;
@ -439,11 +439,9 @@ mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
} while (max);
wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
MLX5_COMP_MODE_OFFSET);
rte_io_wmb();
sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
rte_wmb();
*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
rte_wmb();
mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
!sh->tx_uar.dbnc);
return sq->elts[start_head & mask].burst_size;
}
@ -644,7 +642,7 @@ mlx5_flow_aso_alarm(void *arg)
us = US_PER_S;
sq->next = 0;
}
mlx5_aso_sq_enqueue_burst(sh->aso_age_mng, n);
mlx5_aso_sq_enqueue_burst(sh, n);
if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
}
@ -695,8 +693,9 @@ mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
}
static uint16_t
mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
struct mlx5_aso_mtr *aso_mtr)
mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
struct mlx5_aso_sq *sq,
struct mlx5_aso_mtr *aso_mtr)
{
volatile struct mlx5_aso_wqe *wqe = NULL;
struct mlx5_flow_meter_info *fm = NULL;
@ -774,11 +773,9 @@ mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
*/
sq->head++;
sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
rte_io_wmb();
sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
rte_wmb();
*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
rte_wmb();
mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
!sh->tx_uar.dbnc);
rte_spinlock_unlock(&sq->sqsl);
return 1;
}
@ -871,7 +868,7 @@ mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
do {
mlx5_aso_mtr_completion_handle(sq);
if (mlx5_aso_mtr_sq_enqueue_single(sq, mtr))
if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr))
return 0;
/* Waiting for wqe resource. */
rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
@ -920,8 +917,8 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
/*
* Post a WQE to the ASO CT SQ to modify the context.
*
* @param[in] mng
* Pointer to the CT pools management structure.
* @param[in] sh
* Pointer to shared device context.
* @param[in] ct
* Pointer to the generic CT structure related to the context.
* @param[in] profile
@ -931,12 +928,12 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
* 1 on success (WQE number), 0 on failure.
*/
static uint16_t
mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
struct mlx5_aso_ct_action *ct,
const struct rte_flow_action_conntrack *profile)
{
volatile struct mlx5_aso_wqe *wqe = NULL;
struct mlx5_aso_sq *sq = &mng->aso_sq;
struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
uint16_t size = 1 << sq->log_desc_n;
uint16_t mask = size - 1;
uint16_t res;
@ -1039,11 +1036,9 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
profile->reply_dir.max_ack);
sq->head++;
sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
rte_io_wmb();
sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
rte_wmb();
*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
rte_wmb();
mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
!sh->tx_uar.dbnc);
rte_spinlock_unlock(&sq->sqsl);
return 1;
}
@ -1084,8 +1079,8 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
/*
* Post a WQE to the ASO CT SQ to query the current context.
*
* @param[in] mng
* Pointer to the CT pools management structure.
* @param[in] sh
* Pointer to shared device context.
* @param[in] ct
* Pointer to the generic CT structure related to the context.
* @param[in] data
@ -1095,11 +1090,11 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
* 1 on success (WQE number), 0 on failure.
*/
static int
mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
struct mlx5_aso_ct_action *ct, char *data)
{
volatile struct mlx5_aso_wqe *wqe = NULL;
struct mlx5_aso_sq *sq = &mng->aso_sq;
struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
uint16_t size = 1 << sq->log_desc_n;
uint16_t mask = size - 1;
uint16_t res;
@ -1154,11 +1149,9 @@ mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
* data segment is not used in this case.
*/
sq->pi += 2;
rte_io_wmb();
sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
rte_wmb();
*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
rte_wmb();
mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
!sh->tx_uar.dbnc);
rte_spinlock_unlock(&sq->sqsl);
return 1;
}
@ -1238,14 +1231,13 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
struct mlx5_aso_ct_action *ct,
const struct rte_flow_action_conntrack *profile)
{
struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
struct mlx5_aso_ct_pool *pool;
MLX5_ASSERT(ct);
do {
mlx5_aso_ct_completion_handle(mng);
if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
mlx5_aso_ct_completion_handle(sh->ct_mng);
if (mlx5_aso_ct_sq_enqueue_single(sh, ct, profile))
return 0;
/* Waiting for wqe resource. */
rte_delay_us_sleep(10u);
@ -1385,7 +1377,6 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
struct mlx5_aso_ct_action *ct,
struct rte_flow_action_conntrack *profile)
{
struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
struct mlx5_aso_ct_pool *pool;
char out_data[64 * 2];
@ -1393,8 +1384,8 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
MLX5_ASSERT(ct);
do {
mlx5_aso_ct_completion_handle(mng);
ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
mlx5_aso_ct_completion_handle(sh->ct_mng);
ret = mlx5_aso_ct_sq_query_single(sh, ct, out_data);
if (ret < 0)
return ret;
else if (ret > 0)

View File

@ -127,13 +127,9 @@ struct mlx5_rxq_data {
struct mlx5_rxq_stats stats;
rte_xmm_t mbuf_initializer; /* Default rearm/flags for vectorized Rx. */
struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
void *cq_uar; /* Verbs CQ user access region. */
struct mlx5_uar_data uar_data; /* CQ doorbell. */
uint32_t cqn; /* CQ number. */
uint8_t cq_arm_sn; /* CQ arm seq number. */
#ifndef RTE_ARCH_64
rte_spinlock_t *uar_lock_cq;
/* CQ (UAR) access lock required for 32bit implementations */
#endif
uint32_t tunnel; /* Tunnel information. */
int timestamp_offset; /* Dynamic mbuf field for timestamp. */
uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */

View File

@ -21,11 +21,11 @@
#include <mlx5_glue.h>
#include <mlx5_malloc.h>
#include <mlx5_common.h>
#include <mlx5_common_mr.h>
#include "mlx5_defs.h"
#include "mlx5.h"
#include "mlx5_tx.h"
#include "mlx5_rx.h"
#include "mlx5_utils.h"
#include "mlx5_autoconf.h"
@ -1171,15 +1171,13 @@ mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
int sq_n = 0;
uint32_t doorbell_hi;
uint64_t doorbell;
void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
doorbell = (uint64_t)doorbell_hi << 32;
doorbell |= rxq->cqn;
rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
mlx5_uar_write64(rte_cpu_to_be_64(doorbell),
cq_db_reg, rxq->uar_lock_cq);
mlx5_doorbell_ring(&rxq->uar_data, rte_cpu_to_be_64(doorbell),
doorbell_hi, &rxq->cq_db[MLX5_CQ_ARM_DB], 0);
}
/**
@ -1842,9 +1840,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq,
(struct rte_mbuf *(*)[desc_n])(tmpl + 1);
tmpl->rxq.mprq_bufs =
(struct mlx5_mprq_buf *(*)[desc])(*tmpl->rxq.elts + desc_n);
#ifndef RTE_ARCH_64
tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
#endif
tmpl->rxq.idx = idx;
LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
return tmpl;

View File

@ -14,6 +14,7 @@
#include <rte_common.h>
#include <rte_spinlock.h>
#include <mlx5_common.h>
#include <mlx5_common_mr.h>
#include "mlx5.h"
@ -160,10 +161,7 @@ struct mlx5_txq_data {
int32_t ts_offset; /* Timestamp field dynamic offset. */
struct mlx5_dev_ctx_shared *sh; /* Shared context. */
struct mlx5_txq_stats stats; /* TX queue counters. */
#ifndef RTE_ARCH_64
rte_spinlock_t *uar_lock;
/* UAR access lock required for 32bit implementations */
#endif
struct mlx5_uar_data uar_data;
struct rte_mbuf *elts[0];
/* Storage for queued packets, must be the last field. */
} __rte_cache_aligned;
@ -203,7 +201,6 @@ int mlx5_tx_hairpin_queue_setup
(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
const struct rte_eth_hairpin_conf *hairpin_conf);
void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
void txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl, void *bf_reg);
int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev);
int mlx5_txq_obj_verify(struct rte_eth_dev *dev);
@ -288,92 +285,10 @@ MLX5_TXOFF_PRE_DECL(mci_mpw);
MLX5_TXOFF_PRE_DECL(mc_mpw);
MLX5_TXOFF_PRE_DECL(i_mpw);
static __rte_always_inline uint64_t *
static __rte_always_inline struct mlx5_uar_data *
mlx5_tx_bfreg(struct mlx5_txq_data *txq)
{
return MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx];
}
/**
* Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
* 64bit architectures.
*
* @param val
* value to write in CPU endian format.
* @param addr
* Address to write to.
* @param lock
* Address of the lock to use for that UAR access.
*/
static __rte_always_inline void
__mlx5_uar_write64_relaxed(uint64_t val, void *addr,
rte_spinlock_t *lock __rte_unused)
{
#ifdef RTE_ARCH_64
*(uint64_t *)addr = val;
#else /* !RTE_ARCH_64 */
rte_spinlock_lock(lock);
*(uint32_t *)addr = val;
rte_io_wmb();
*((uint32_t *)addr + 1) = val >> 32;
rte_spinlock_unlock(lock);
#endif
}
/**
* Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
* 64bit architectures while guaranteeing the order of execution with the
* code being executed.
*
* @param val
* value to write in CPU endian format.
* @param addr
* Address to write to.
* @param lock
* Address of the lock to use for that UAR access.
*/
static __rte_always_inline void
__mlx5_uar_write64(uint64_t val, void *addr, rte_spinlock_t *lock)
{
rte_io_wmb();
__mlx5_uar_write64_relaxed(val, addr, lock);
}
/* Assist macros, used instead of directly calling the functions they wrap. */
#ifdef RTE_ARCH_64
#define mlx5_uar_write64_relaxed(val, dst, lock) \
__mlx5_uar_write64_relaxed(val, dst, NULL)
#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, NULL)
#else
#define mlx5_uar_write64_relaxed(val, dst, lock) \
__mlx5_uar_write64_relaxed(val, dst, lock)
#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock)
#endif
/**
* Ring TX queue doorbell and flush the update if requested.
*
* @param txq
* Pointer to TX queue structure.
* @param wqe
* Pointer to the last WQE posted in the NIC.
* @param cond
* Request for write memory barrier after BlueFlame update.
*/
static __rte_always_inline void
mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
int cond)
{
uint64_t *dst = mlx5_tx_bfreg(txq);
volatile uint64_t *src = ((volatile uint64_t *)wqe);
rte_io_wmb();
*txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
mlx5_uar_write64_relaxed(*src, dst, txq->uar_lock);
if (cond)
rte_wmb();
return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx];
}
/**
@ -387,7 +302,8 @@ mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
static __rte_always_inline void
mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
{
mlx5_tx_dbrec_cond_wmb(txq, wqe, 1);
mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe,
txq->wqe_ci, txq->qp_db, 1);
}
/**
@ -3660,8 +3576,10 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq,
* packets are coming and the write barrier will be issued on
* the next burst (after descriptor writing, at least).
*/
mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc &&
(!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST));
mlx5_doorbell_ring(mlx5_tx_bfreg(txq),
*(volatile uint64_t *)loc.wqe_last, txq->wqe_ci,
txq->qp_db, !txq->db_nc &&
(!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST));
/* Not all of the mbufs may be stored into elts yet. */
part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy;
if (!MLX5_TXOFF_CONFIG(INLINE) && part) {

View File

@ -164,21 +164,14 @@ mlx5_txpp_doorbell_rearm_queue(struct mlx5_dev_ctx_shared *sh, uint16_t ci)
uint32_t w32[2];
uint64_t w64;
} cs;
void *reg_addr;
wq->sq_ci = ci + 1;
cs.w32[0] = rte_cpu_to_be_32(rte_be_to_cpu_32
(wqe[ci & (wq->sq_size - 1)].ctrl[0]) | (ci - 1) << 8);
cs.w32[1] = wqe[ci & (wq->sq_size - 1)].ctrl[1];
/* Update SQ doorbell record with new SQ ci. */
rte_compiler_barrier();
*wq->sq_obj.db_rec = rte_cpu_to_be_32(wq->sq_ci);
/* Make sure the doorbell record is updated. */
rte_wmb();
/* Write to doorbel register to start processing. */
reg_addr = mlx5_os_get_devx_uar_reg_addr(sh->tx_uar);
__mlx5_uar_write64_relaxed(cs.w64, reg_addr, NULL);
rte_wmb();
mlx5_doorbell_ring(&sh->tx_uar.bf_db, cs.w64, wq->sq_ci,
wq->sq_obj.db_rec, !sh->tx_uar.dbnc);
}
static void
@ -233,14 +226,15 @@ mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh)
.tis_num = sh->tis[0]->id,
.wq_attr = (struct mlx5_devx_wq_attr){
.pd = sh->cdev->pdn,
.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar),
.uar_page =
mlx5_os_get_devx_uar_page_id(sh->tx_uar.obj),
},
.ts_format = mlx5_ts_format_conv
(sh->cdev->config.hca_attr.sq_ts_format),
};
struct mlx5_devx_modify_sq_attr msq_attr = { 0 };
struct mlx5_devx_cq_attr cq_attr = {
.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar),
.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar.obj),
};
struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue;
int ret;
@ -394,7 +388,7 @@ mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh)
struct mlx5_devx_cq_attr cq_attr = {
.use_first_only = 1,
.overrun_ignore = 1,
.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar),
.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar.obj),
};
struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
int ret;
@ -444,7 +438,7 @@ mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh)
sq_attr.cqn = wq->cq_obj.cq->id;
sq_attr.packet_pacing_rate_limit_index = sh->txpp.pp_id;
sq_attr.wq_attr.cd_slave = 1;
sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar);
sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar.obj);
sq_attr.wq_attr.pd = sh->cdev->pdn;
sq_attr.ts_format =
mlx5_ts_format_conv(sh->cdev->config.hca_attr.sq_ts_format);
@ -479,26 +473,14 @@ mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh)
static inline void
mlx5_txpp_cq_arm(struct mlx5_dev_ctx_shared *sh)
{
void *base_addr;
struct mlx5_txpp_wq *aq = &sh->txpp.rearm_queue;
uint32_t arm_sn = aq->arm_sn << MLX5_CQ_SQN_OFFSET;
uint32_t db_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | aq->cq_ci;
uint64_t db_be =
rte_cpu_to_be_64(((uint64_t)db_hi << 32) | aq->cq_obj.cq->id);
base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar);
uint32_t *addr = RTE_PTR_ADD(base_addr, MLX5_CQ_DOORBELL);
rte_compiler_barrier();
aq->cq_obj.db_rec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(db_hi);
rte_wmb();
#ifdef RTE_ARCH_64
*(uint64_t *)addr = db_be;
#else
*(uint32_t *)addr = db_be;
rte_io_wmb();
*((uint32_t *)addr + 1) = db_be >> 32;
#endif
mlx5_doorbell_ring(&sh->tx_uar.cq_db, db_be, db_hi,
&aq->cq_obj.db_rec[MLX5_CQ_ARM_DB], 0);
aq->arm_sn++;
}

View File

@ -494,66 +494,6 @@ mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
mlx5_txq_release(dev, qid);
}
/**
* Configure the doorbell register non-cached attribute.
*
* @param txq_ctrl
* Pointer to Tx queue control structure.
* @param page_size
* Systme page size
*/
static void
txq_uar_ncattr_init(struct mlx5_txq_ctrl *txq_ctrl, size_t page_size)
{
struct mlx5_common_device *cdev = txq_ctrl->priv->sh->cdev;
off_t cmd;
txq_ctrl->txq.db_heu = cdev->config.dbnc == MLX5_TXDB_HEURISTIC;
txq_ctrl->txq.db_nc = 0;
/* Check the doorbell register mapping type. */
cmd = txq_ctrl->uar_mmap_offset / page_size;
cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
cmd &= MLX5_UAR_MMAP_CMD_MASK;
if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
txq_ctrl->txq.db_nc = 1;
}
/**
* Initialize Tx UAR registers for primary process.
*
* @param txq_ctrl
* Pointer to Tx queue control structure.
* @param bf_reg
* BlueFlame register from Verbs UAR.
*/
void
txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl, void *bf_reg)
{
struct mlx5_priv *priv = txq_ctrl->priv;
struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
#ifndef RTE_ARCH_64
unsigned int lock_idx;
#endif
const size_t page_size = rte_mem_page_size();
if (page_size == (size_t)-1) {
DRV_LOG(ERR, "Failed to get mem page size");
rte_errno = ENOMEM;
}
if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD)
return;
MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
MLX5_ASSERT(ppriv);
ppriv->uar_table[txq_ctrl->txq.idx] = bf_reg;
txq_uar_ncattr_init(txq_ctrl, page_size);
#ifndef RTE_ARCH_64
/* Assign an UAR lock according to UAR page number */
lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
MLX5_UAR_PAGE_NUM_MASK;
txq_ctrl->txq.uar_lock = &priv->sh->uar_lock[lock_idx];
#endif
}
/**
* Remap UAR register of a Tx queue for secondary process.
*
@ -592,7 +532,7 @@ txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd)
* As rdma-core, UARs are mapped in size of OS page
* size. Ref to libmlx5 function: mlx5_init_context()
*/
uar_va = (uintptr_t)primary_ppriv->uar_table[txq->idx];
uar_va = (uintptr_t)primary_ppriv->uar_table[txq->idx].db;
offset = uar_va & (page_size - 1); /* Offset in page. */
addr = rte_mem_map(NULL, page_size, RTE_PROT_WRITE, RTE_MAP_SHARED,
fd, txq_ctrl->uar_mmap_offset);
@ -603,7 +543,11 @@ txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd)
return -rte_errno;
}
addr = RTE_PTR_ADD(addr, offset);
ppriv->uar_table[txq->idx] = addr;
ppriv->uar_table[txq->idx].db = addr;
#ifndef RTE_ARCH_64
ppriv->uar_table[txq->idx].sl_p =
primary_ppriv->uar_table[txq->idx].sl_p;
#endif
return 0;
}
@ -626,7 +570,7 @@ txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD)
return;
addr = ppriv->uar_table[txq_ctrl->txq.idx];
addr = ppriv->uar_table[txq_ctrl->txq.idx].db;
rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
}
@ -651,9 +595,9 @@ mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev)
}
MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY);
for (i = 0; i != ppriv->uar_table_sz; ++i) {
if (!ppriv->uar_table[i])
if (!ppriv->uar_table[i].db)
continue;
addr = ppriv->uar_table[i];
addr = ppriv->uar_table[i].db;
rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
}

View File

@ -72,6 +72,7 @@ mlx5_regex_dev_probe(struct mlx5_common_device *cdev)
struct mlx5_regex_priv *priv = NULL;
struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
char name[RTE_REGEXDEV_NAME_MAX_LEN];
int ret;
if ((!attr->regexp_params && !attr->mmo_regex_sq_en && !attr->mmo_regex_qp_en)
|| attr->regexp_num_of_engines == 0) {
@ -102,17 +103,9 @@ mlx5_regex_dev_probe(struct mlx5_common_device *cdev)
rte_errno = rte_errno ? rte_errno : EINVAL;
goto dev_error;
}
/*
* This PMD always claims the write memory barrier on UAR
* registers writings, it is safe to allocate UAR with any
* memory mapping type.
*/
priv->uar = mlx5_devx_alloc_uar(priv->cdev);
if (!priv->uar) {
DRV_LOG(ERR, "can't allocate uar.");
rte_errno = ENOMEM;
ret = mlx5_devx_uar_prepare(cdev, &priv->uar);
if (ret)
goto error;
}
priv->regexdev->dev_ops = &mlx5_regexdev_ops;
priv->regexdev->enqueue = mlx5_regexdev_enqueue;
#ifdef HAVE_MLX5_UMR_IMKEY
@ -131,8 +124,6 @@ mlx5_regex_dev_probe(struct mlx5_common_device *cdev)
return 0;
error:
if (priv->uar)
mlx5_glue->devx_free_uar(priv->uar);
if (priv->regexdev)
rte_regexdev_unregister(priv->regexdev);
dev_error:
@ -154,8 +145,7 @@ mlx5_regex_dev_remove(struct mlx5_common_device *cdev)
return 0;
priv = dev->data->dev_private;
if (priv) {
if (priv->uar)
mlx5_glue->devx_free_uar(priv->uar);
mlx5_devx_uar_release(&priv->uar);
if (priv->regexdev)
rte_regexdev_unregister(priv->regexdev);
rte_free(priv);

View File

@ -55,7 +55,7 @@ struct mlx5_regex_priv {
uint16_t nb_max_matches; /* Max number of matches. */
enum mlx5_rxp_program_mode prog_mode;
uint32_t nb_engines; /* Number of RegEx engines. */
struct mlx5dv_devx_uar *uar; /* UAR object. */
struct mlx5_uar uar; /* UAR object. */
uint8_t is_bf2; /* The device is BF2 device. */
uint8_t has_umr; /* The device supports UMR. */
uint32_t mmo_regex_qp_cap:1;

View File

@ -77,7 +77,7 @@ static int
regex_ctrl_create_cq(struct mlx5_regex_priv *priv, struct mlx5_regex_cq *cq)
{
struct mlx5_devx_cq_attr attr = {
.uar_page_id = priv->uar->page_id,
.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
};
int ret;
@ -136,7 +136,7 @@ regex_ctrl_create_hw_qp(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
struct mlx5_devx_qp_attr attr = {
.cqn = qp->cq.cq_obj.cq->id,
.uar_index = priv->uar->page_id,
.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
.pd = priv->cdev->pdn,
.ts_format = mlx5_ts_format_conv
(priv->cdev->config.hca_attr.qp_ts_format),

View File

@ -168,24 +168,20 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
}
static inline void
send_doorbell(struct mlx5_regex_priv *priv, struct mlx5_regex_hw_qp *qp_obj)
send_doorbell(struct mlx5_regex_priv *priv, struct mlx5_regex_hw_qp *qp)
{
struct mlx5dv_devx_uar *uar = priv->uar;
size_t wqe_offset = (qp_obj->db_pi & (qp_size_get(qp_obj) - 1)) *
(MLX5_SEND_WQE_BB << (priv->has_umr ? 2 : 0)) +
(priv->has_umr ? MLX5_REGEX_UMR_WQE_SIZE : 0);
uint8_t *wqe = (uint8_t *)(uintptr_t)qp_obj->qp_obj.wqes + wqe_offset;
size_t wqe_offset = (qp->db_pi & (qp_size_get(qp) - 1)) *
(MLX5_SEND_WQE_BB << (priv->has_umr ? 2 : 0)) +
(priv->has_umr ? MLX5_REGEX_UMR_WQE_SIZE : 0);
uint8_t *wqe = (uint8_t *)(uintptr_t)qp->qp_obj.wqes + wqe_offset;
uint32_t actual_pi = (priv->has_umr ? (qp->db_pi * 4 + 3) : qp->db_pi) &
MLX5_REGEX_MAX_WQE_INDEX;
/* Or the fm_ce_se instead of set, avoid the fence be cleared. */
((struct mlx5_wqe_ctrl_seg *)wqe)->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
uint64_t *doorbell_addr =
(uint64_t *)((uint8_t *)uar->base_addr + 0x800);
rte_io_wmb();
qp_obj->qp_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32((priv->has_umr ?
(qp_obj->db_pi * 4 + 3) : qp_obj->db_pi)
& MLX5_REGEX_MAX_WQE_INDEX);
rte_wmb();
*doorbell_addr = *(volatile uint64_t *)wqe;
rte_wmb();
mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)wqe,
actual_pi, &qp->qp_obj.db_rec[MLX5_SND_DBR],
!priv->uar.dbnc);
}
static inline int

View File

@ -136,7 +136,7 @@ struct mlx5_vdpa_priv {
struct rte_vhost_memory *vmem;
struct mlx5dv_devx_event_channel *eventc;
struct mlx5dv_devx_event_channel *err_chnl;
struct mlx5dv_devx_uar *uar;
struct mlx5_uar uar;
struct rte_intr_handle *err_intr_handle;
struct mlx5_devx_obj *td;
struct mlx5_devx_obj *tiss[16]; /* TIS list for each LAG port. */

View File

@ -30,10 +30,7 @@
void
mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
{
if (priv->uar) {
mlx5_glue->devx_free_uar(priv->uar);
priv->uar = NULL;
}
mlx5_devx_uar_release(&priv->uar);
#ifdef HAVE_IBV_DEVX_EVENT
if (priv->eventc) {
mlx5_os_devx_destroy_event_channel(priv->eventc);
@ -56,14 +53,7 @@ mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv)
rte_errno);
goto error;
}
/*
* This PMD always claims the write memory barrier on UAR
* registers writings, it is safe to allocate UAR with any
* memory mapping type.
*/
priv->uar = mlx5_devx_alloc_uar(priv->cdev);
if (!priv->uar) {
rte_errno = errno;
if (mlx5_devx_uar_prepare(priv->cdev, &priv->uar) != 0) {
DRV_LOG(ERR, "Failed to allocate UAR.");
goto error;
}
@ -88,18 +78,9 @@ mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq)
uint32_t doorbell_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | cq_ci;
uint64_t doorbell = ((uint64_t)doorbell_hi << 32) | cq->cq_obj.cq->id;
uint64_t db_be = rte_cpu_to_be_64(doorbell);
uint32_t *addr = RTE_PTR_ADD(priv->uar->base_addr, MLX5_CQ_DOORBELL);
rte_io_wmb();
cq->cq_obj.db_rec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
rte_wmb();
#ifdef RTE_ARCH_64
*(uint64_t *)addr = db_be;
#else
*(uint32_t *)addr = db_be;
rte_io_wmb();
*((uint32_t *)addr + 1) = db_be >> 32;
#endif
mlx5_doorbell_ring(&priv->uar.cq_db, db_be, doorbell_hi,
&cq->cq_obj.db_rec[MLX5_CQ_ARM_DB], 0);
cq->arm_sn++;
cq->armed = 1;
}
@ -110,7 +91,7 @@ mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t log_desc_n,
{
struct mlx5_devx_cq_attr attr = {
.use_first_only = 1,
.uar_page_id = priv->uar->page_id,
.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
};
uint16_t event_nums[1] = {0};
int ret;
@ -606,7 +587,7 @@ mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n,
DRV_LOG(ERR, "Failed to create FW QP(%u).", rte_errno);
goto error;
}
attr.uar_index = priv->uar->page_id;
attr.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj);
attr.cqn = eqp->cq.cq_obj.cq->id;
attr.rq_size = RTE_BIT32(log_desc_n);
attr.log_rq_stride = rte_log2_u32(MLX5_WSEG_SIZE);