net/mlx5: map UAR address around huge pages
Reserving the memory space for the UAR near huge pages helps to **reduce** the cases where the secondary process cannot start. Those pages being physical pages they must be mapped at the same virtual address as in the primary process to have a working secondary process. As this remap is almost the latest being done by the processes (libraries, heaps, stacks are already loaded), similar to huge pages, there is **no guarantee** this mechanism will always work. Signed-off-by: Xueming Li <xuemingl@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
This commit is contained in:
parent
25f28d9d29
commit
4a98415343
@ -39,6 +39,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <net/if.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
/* Verbs header. */
|
||||
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
|
||||
@ -56,6 +57,7 @@
|
||||
#include <rte_pci.h>
|
||||
#include <rte_bus_pci.h>
|
||||
#include <rte_common.h>
|
||||
#include <rte_eal_memconfig.h>
|
||||
#include <rte_kvargs.h>
|
||||
|
||||
#include "mlx5.h"
|
||||
@ -478,6 +480,106 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
|
||||
|
||||
static struct rte_pci_driver mlx5_driver;
|
||||
|
||||
/*
|
||||
* Reserved UAR address space for TXQ UAR(hw doorbell) mapping, process
|
||||
* local resource used by both primary and secondary to avoid duplicate
|
||||
* reservation.
|
||||
* The space has to be available on both primary and secondary process,
|
||||
* TXQ UAR maps to this area using fixed mmap w/o double check.
|
||||
*/
|
||||
static void *uar_base;
|
||||
|
||||
/**
|
||||
* Reserve UAR address space for primary process.
|
||||
*
|
||||
* @param[in] priv
|
||||
* Pointer to private structure.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, errno value on failure.
|
||||
*/
|
||||
static int
|
||||
priv_uar_init_primary(struct priv *priv)
|
||||
{
|
||||
void *addr = (void *)0;
|
||||
int i;
|
||||
const struct rte_mem_config *mcfg;
|
||||
int ret;
|
||||
|
||||
if (uar_base) { /* UAR address space mapped. */
|
||||
priv->uar_base = uar_base;
|
||||
return 0;
|
||||
}
|
||||
/* find out lower bound of hugepage segments */
|
||||
mcfg = rte_eal_get_configuration()->mem_config;
|
||||
for (i = 0; i < RTE_MAX_MEMSEG && mcfg->memseg[i].addr; i++) {
|
||||
if (addr)
|
||||
addr = RTE_MIN(addr, mcfg->memseg[i].addr);
|
||||
else
|
||||
addr = mcfg->memseg[i].addr;
|
||||
}
|
||||
/* keep distance to hugepages to minimize potential conflicts. */
|
||||
addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE);
|
||||
/* anonymous mmap, no real memory consumption. */
|
||||
addr = mmap(addr, MLX5_UAR_SIZE,
|
||||
PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
ERROR("Failed to reserve UAR address space, please adjust "
|
||||
"MLX5_UAR_SIZE or try --base-virtaddr");
|
||||
ret = ENOMEM;
|
||||
return ret;
|
||||
}
|
||||
/* Accept either same addr or a new addr returned from mmap if target
|
||||
* range occupied.
|
||||
*/
|
||||
INFO("Reserved UAR address space: %p", addr);
|
||||
priv->uar_base = addr; /* for primary and secondary UAR re-mmap. */
|
||||
uar_base = addr; /* process local, don't reserve again. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve UAR address space for secondary process, align with
|
||||
* primary process.
|
||||
*
|
||||
* @param[in] priv
|
||||
* Pointer to private structure.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, errno value on failure.
|
||||
*/
|
||||
static int
|
||||
priv_uar_init_secondary(struct priv *priv)
|
||||
{
|
||||
void *addr;
|
||||
int ret;
|
||||
|
||||
assert(priv->uar_base);
|
||||
if (uar_base) { /* already reserved. */
|
||||
assert(uar_base == priv->uar_base);
|
||||
return 0;
|
||||
}
|
||||
/* anonymous mmap, no real memory consumption. */
|
||||
addr = mmap(priv->uar_base, MLX5_UAR_SIZE,
|
||||
PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
ERROR("UAR mmap failed: %p size: %llu",
|
||||
priv->uar_base, MLX5_UAR_SIZE);
|
||||
ret = ENXIO;
|
||||
return ret;
|
||||
}
|
||||
if (priv->uar_base != addr) {
|
||||
ERROR("UAR address %p size %llu occupied, please adjust "
|
||||
"MLX5_UAR_OFFSET or try EAL parameter --base-virtaddr",
|
||||
priv->uar_base, MLX5_UAR_SIZE);
|
||||
ret = ENXIO;
|
||||
return ret;
|
||||
}
|
||||
uar_base = addr; /* process local, don't reserve again */
|
||||
INFO("Reserved UAR address space: %p", addr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* DPDK callback to register a PCI device.
|
||||
*
|
||||
@ -663,6 +765,11 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
|
||||
eth_dev->device = &pci_dev->device;
|
||||
eth_dev->dev_ops = &mlx5_dev_sec_ops;
|
||||
priv = eth_dev->data->dev_private;
|
||||
err = priv_uar_init_secondary(priv);
|
||||
if (err < 0) {
|
||||
err = -err;
|
||||
goto error;
|
||||
}
|
||||
/* Receive command fd from primary process */
|
||||
err = priv_socket_connect(priv);
|
||||
if (err < 0) {
|
||||
@ -671,10 +778,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
|
||||
}
|
||||
/* Remap UAR for Tx queues. */
|
||||
err = priv_tx_uar_remap(priv, err);
|
||||
if (err < 0) {
|
||||
err = -err;
|
||||
if (err)
|
||||
goto error;
|
||||
}
|
||||
/*
|
||||
* Ethdev pointer is still required as input since
|
||||
* the primary device is not accessible from the
|
||||
@ -820,6 +925,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
|
||||
WARN("Rx CQE compression isn't supported");
|
||||
config.cqe_comp = 0;
|
||||
}
|
||||
err = priv_uar_init_primary(priv);
|
||||
if (err)
|
||||
goto port_error;
|
||||
/* Configure the first MAC address by default. */
|
||||
if (priv_get_mac(priv, &mac.addr_bytes)) {
|
||||
ERROR("cannot get MAC address, is mlx5_en loaded?"
|
||||
|
@ -179,6 +179,7 @@ struct priv {
|
||||
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
|
||||
rte_spinlock_t lock; /* Lock for control functions. */
|
||||
int primary_socket; /* Unix socket for primary process. */
|
||||
void *uar_base; /* Reserved address space for UAR mapping */
|
||||
struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
|
||||
struct mlx5_dev_config config; /* Device configuration. */
|
||||
struct mlx5_verbs_alloc_ctx verbs_alloc_ctx;
|
||||
|
@ -113,4 +113,14 @@
|
||||
/* Maximum number of attempts to query link status before giving up. */
|
||||
#define MLX5_MAX_LINK_QUERY_ATTEMPTS 5
|
||||
|
||||
/* Reserved address space for UAR mapping. */
|
||||
#define MLX5_UAR_SIZE (1ULL << 32)
|
||||
|
||||
/* Offset of reserved UAR address space to hugepage memory. Offset is used here
|
||||
* to minimize possibility of address next to hugepage being used by other code
|
||||
* in either primary or secondary process, failing to map TX UAR would make TX
|
||||
* packets invisible to HW.
|
||||
*/
|
||||
#define MLX5_UAR_OFFSET (1ULL << 32)
|
||||
|
||||
#endif /* RTE_PMD_MLX5_DEFS_H_ */
|
||||
|
@ -205,7 +205,7 @@ struct mlx5_txq_data {
|
||||
volatile void *wqes; /* Work queue (use volatile to write into). */
|
||||
volatile uint32_t *qp_db; /* Work queue doorbell. */
|
||||
volatile uint32_t *cq_db; /* Completion queue doorbell. */
|
||||
volatile void *bf_reg; /* Blueflame register. */
|
||||
volatile void *bf_reg; /* Blueflame register remapped. */
|
||||
struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
|
||||
struct rte_mbuf *(*elts)[]; /* TX elements. */
|
||||
struct mlx5_txq_stats stats; /* TX queue counters. */
|
||||
@ -230,6 +230,7 @@ struct mlx5_txq_ctrl {
|
||||
struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
|
||||
struct mlx5_txq_data txq; /* Data path structure. */
|
||||
off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
|
||||
volatile void *bf_reg_orig; /* Blueflame register from verbs. */
|
||||
};
|
||||
|
||||
/* mlx5_rxq.c */
|
||||
|
@ -76,10 +76,13 @@ priv_txq_start(struct priv *priv)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
return -ret;
|
||||
ret = priv_tx_uar_remap(priv, priv->ctx->cmd_fd);
|
||||
if (ret)
|
||||
goto error;
|
||||
return ret;
|
||||
error:
|
||||
priv_txq_stop(priv);
|
||||
return -ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -288,7 +288,9 @@ mlx5_tx_queue_release(void *dpdk_txq)
|
||||
|
||||
|
||||
/**
|
||||
* Map locally UAR used in Tx queues for BlueFlame doorbell.
|
||||
* Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
|
||||
* Both primary and secondary process do mmap to make UAR address
|
||||
* aligned.
|
||||
*
|
||||
* @param[in] priv
|
||||
* Pointer to private structure.
|
||||
@ -305,11 +307,14 @@ priv_tx_uar_remap(struct priv *priv, int fd)
|
||||
uintptr_t pages[priv->txqs_n];
|
||||
unsigned int pages_n = 0;
|
||||
uintptr_t uar_va;
|
||||
uintptr_t off;
|
||||
void *addr;
|
||||
void *ret;
|
||||
struct mlx5_txq_data *txq;
|
||||
struct mlx5_txq_ctrl *txq_ctrl;
|
||||
int already_mapped;
|
||||
size_t page_size = sysconf(_SC_PAGESIZE);
|
||||
int r;
|
||||
|
||||
memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
|
||||
/*
|
||||
@ -320,8 +325,10 @@ priv_tx_uar_remap(struct priv *priv, int fd)
|
||||
for (i = 0; i != priv->txqs_n; ++i) {
|
||||
txq = (*priv->txqs)[i];
|
||||
txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
|
||||
uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
|
||||
uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
|
||||
/* UAR addr form verbs used to find dup and offset in page. */
|
||||
uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
|
||||
off = uar_va & (page_size - 1); /* offset in page. */
|
||||
uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
|
||||
already_mapped = 0;
|
||||
for (j = 0; j != pages_n; ++j) {
|
||||
if (pages[j] == uar_va) {
|
||||
@ -329,16 +336,30 @@ priv_tx_uar_remap(struct priv *priv, int fd)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (already_mapped)
|
||||
continue;
|
||||
pages[pages_n++] = uar_va;
|
||||
addr = mmap((void *)uar_va, page_size,
|
||||
PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
|
||||
txq_ctrl->uar_mmap_offset);
|
||||
if (addr != (void *)uar_va) {
|
||||
ERROR("call to mmap failed on UAR for txq %d\n", i);
|
||||
return -1;
|
||||
/* new address in reserved UAR address space. */
|
||||
addr = RTE_PTR_ADD(priv->uar_base,
|
||||
uar_va & (MLX5_UAR_SIZE - 1));
|
||||
if (!already_mapped) {
|
||||
pages[pages_n++] = uar_va;
|
||||
/* fixed mmap to specified address in reserved
|
||||
* address space.
|
||||
*/
|
||||
ret = mmap(addr, page_size,
|
||||
PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
|
||||
txq_ctrl->uar_mmap_offset);
|
||||
if (ret != addr) {
|
||||
/* fixed mmap have to return same address */
|
||||
ERROR("call to mmap failed on UAR for txq %d\n",
|
||||
i);
|
||||
r = ENXIO;
|
||||
return r;
|
||||
}
|
||||
}
|
||||
if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
|
||||
txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
|
||||
else
|
||||
assert(txq_ctrl->txq.bf_reg ==
|
||||
RTE_PTR_ADD((void *)addr, off));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -505,7 +526,7 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
|
||||
txq_data->wqes = qp.sq.buf;
|
||||
txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
|
||||
txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
|
||||
txq_data->bf_reg = qp.bf.reg;
|
||||
txq_ctrl->bf_reg_orig = qp.bf.reg;
|
||||
txq_data->cq_db = cq_info.dbrec;
|
||||
txq_data->cqes =
|
||||
(volatile struct mlx5_cqe (*)[])
|
||||
@ -840,6 +861,7 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
|
||||
{
|
||||
unsigned int i;
|
||||
struct mlx5_txq_ctrl *txq;
|
||||
size_t page_size = sysconf(_SC_PAGESIZE);
|
||||
|
||||
if (!(*priv->txqs)[idx])
|
||||
return 0;
|
||||
@ -859,6 +881,9 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
|
||||
txq->txq.mp2mr[i] = NULL;
|
||||
}
|
||||
}
|
||||
if (priv->uar_base)
|
||||
munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg,
|
||||
page_size), page_size);
|
||||
if (rte_atomic32_dec_and_test(&txq->refcnt)) {
|
||||
txq_free_elts(txq);
|
||||
LIST_REMOVE(txq, next);
|
||||
|
Loading…
Reference in New Issue
Block a user