From 0dc02cca8b40b5b5190c2679058672a65ab7218d Mon Sep 17 00:00:00 2001 From: Adrien Mazarguil Date: Thu, 3 Mar 2016 15:27:17 +0100 Subject: [PATCH] mlx: use aligned memory to register regions The first and last memory pool elements are usually cache-aligned but not page-aligned, particularly when using huge pages. Hardware performance can be improved significantly by registering memory regions starting and ending on page boundaries. Signed-off-by: Adrien Mazarguil --- drivers/net/mlx4/mlx4.c | 58 ++++++++++++++++++++++++++++++------ drivers/net/mlx5/mlx5_rxq.c | 6 +--- drivers/net/mlx5/mlx5_rxtx.c | 52 +++++++++++++++++++++++++++++--- drivers/net/mlx5/mlx5_rxtx.h | 1 + 4 files changed, 99 insertions(+), 18 deletions(-) diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index 0ec66ec818..cc4e9aa31c 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -86,6 +86,7 @@ #include #include #include +#include #ifdef PEDANTIC #pragma GCC diagnostic error "-pedantic" #endif @@ -1183,6 +1184,52 @@ txq_complete(struct txq *txq) return 0; } +/* For best performance, this function should not be inlined. */ +static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, const struct rte_mempool *) + __attribute__((noinline)); + +/** + * Register mempool as a memory region. + * + * @param pd + * Pointer to protection domain. + * @param mp + * Pointer to memory pool. + * + * @return + * Memory region pointer, NULL in case of error. + */ +static struct ibv_mr * +mlx4_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + uintptr_t start = mp->elt_va_start; + uintptr_t end = mp->elt_va_end; + unsigned int i; + + DEBUG("mempool %p area start=%p end=%p size=%zu", + (const void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); + /* Round start and end to page boundary if found in memory segments. */ + for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { + uintptr_t addr = (uintptr_t)ms[i].addr; + size_t len = ms[i].len; + unsigned int align = ms[i].hugepage_sz; + + if ((start > addr) && (start < addr + len)) + start = RTE_ALIGN_FLOOR(start, align); + if ((end > addr) && (end < addr + len)) + end = RTE_ALIGN_CEIL(end, align); + } + DEBUG("mempool %p using start=%p end=%p size=%zu for MR", + (const void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); + return ibv_reg_mr(pd, + (void *)start, + end - start, + IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); +} + /** * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which * the cloned mbuf is allocated is returned instead. @@ -1234,10 +1281,7 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp) /* Add a new entry, register MR first. */ DEBUG("%p: discovered new memory pool \"%s\" (%p)", (void *)txq, mp->name, (const void *)mp); - mr = ibv_reg_mr(txq->priv->pd, - (void *)mp->elt_va_start, - (mp->elt_va_end - mp->elt_va_start), - (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE)); + mr = mlx4_mp2mr(txq->priv->pd, mp); if (unlikely(mr == NULL)) { DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.", (void *)txq); @@ -3719,11 +3763,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, DEBUG("%p: %s scattered packets support (%u WRs)", (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc); /* Use the entire RX mempool as the memory region. */ - tmpl.mr = ibv_reg_mr(priv->pd, - (void *)mp->elt_va_start, - (mp->elt_va_end - mp->elt_va_start), - (IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE)); + tmpl.mr = mlx4_mp2mr(priv->pd, mp); if (tmpl.mr == NULL) { ret = EINVAL; ERROR("%p: MR creation failure: %s", diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 55d002ee9d..0f5ac65bf6 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -1190,11 +1190,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, DEBUG("%p: %s scattered packets support (%u WRs)", (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc); /* Use the entire RX mempool as the memory region. */ - tmpl.mr = ibv_reg_mr(priv->pd, - (void *)mp->elt_va_start, - (mp->elt_va_end - mp->elt_va_start), - (IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE)); + tmpl.mr = mlx5_mp2mr(priv->pd, mp); if (tmpl.mr == NULL) { ret = EINVAL; ERROR("%p: MR creation failure: %s", diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 622ac17f01..4c53c7a422 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -55,6 +55,7 @@ #include #include #include +#include #ifdef PEDANTIC #pragma GCC diagnostic error "-pedantic" #endif @@ -135,6 +136,52 @@ txq_complete(struct txq *txq) return 0; } +/* For best performance, this function should not be inlined. */ +struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, const struct rte_mempool *) + __attribute__((noinline)); + +/** + * Register mempool as a memory region. + * + * @param pd + * Pointer to protection domain. + * @param mp + * Pointer to memory pool. + * + * @return + * Memory region pointer, NULL in case of error. + */ +struct ibv_mr * +mlx5_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + uintptr_t start = mp->elt_va_start; + uintptr_t end = mp->elt_va_end; + unsigned int i; + + DEBUG("mempool %p area start=%p end=%p size=%zu", + (const void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); + /* Round start and end to page boundary if found in memory segments. */ + for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { + uintptr_t addr = (uintptr_t)ms[i].addr; + size_t len = ms[i].len; + unsigned int align = ms[i].hugepage_sz; + + if ((start > addr) && (start < addr + len)) + start = RTE_ALIGN_FLOOR(start, align); + if ((end > addr) && (end < addr + len)) + end = RTE_ALIGN_CEIL(end, align); + } + DEBUG("mempool %p using start=%p end=%p size=%zu for MR", + (const void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); + return ibv_reg_mr(pd, + (void *)start, + end - start, + IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); +} + /** * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which * the cloned mbuf is allocated is returned instead. @@ -186,10 +233,7 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp) /* Add a new entry, register MR first. */ DEBUG("%p: discovered new memory pool \"%s\" (%p)", (void *)txq, mp->name, (const void *)mp); - mr = ibv_reg_mr(txq->priv->pd, - (void *)mp->elt_va_start, - (mp->elt_va_end - mp->elt_va_start), - (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE)); + mr = mlx5_mp2mr(txq->priv->pd, mp); if (unlikely(mr == NULL)) { DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.", (void *)txq); diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index b239ebfba8..e85cf93bf6 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -317,6 +317,7 @@ void mlx5_tx_queue_release(void *); /* mlx5_rxtx.c */ +struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, const struct rte_mempool *); void txq_mp2mr_iter(const struct rte_mempool *, void *); uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t); uint16_t mlx5_rx_burst_sp(void *, struct rte_mbuf **, uint16_t);