net/mlx5: support 32-bit systems

This patch adds support for building and running mlx5 PMD on
32bit systems such as i686.

The main issue to tackle was handling the 32bit access to the UAR
as quoted from the mlx5 PRM:
QP and CQ DoorBells require 64-bit writes. For best performance, it
is recommended to execute the QP/CQ DoorBell as a single 64-bit write
operation. For platforms that do not support 64 bit writes, it is
possible to issue the 64 bits DoorBells through two consecutive
writes,
each write 32 bits, as described below:
* The order of writing each of the Dwords is from lower to upper
  addresses.
* No other DoorBell can be rung (or even start ringing) in the midst
 of an on-going write of a DoorBell over a given UAR page.

The last rule implies that in a multi-threaded environment, the access
to a UAR page (which can be accessible by all threads in the process)
must be synchronized (for example, using a semaphore) unless an atomic
write of 64 bits in a single bus operation is guaranteed. Such a
synchronization is not required for when ringing DoorBells on different
UAR pages.

Signed-off-by: Moti Haimovsky <motih@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
This commit is contained in:
Moti Haimovsky 2018-07-12 15:01:31 +03:00 committed by Shahaf Shuler
parent 06b1fe3f6d
commit 6bf10ab69b
9 changed files with 131 additions and 17 deletions

View File

@ -43,5 +43,6 @@ Multiprocess aware = Y
Other kdrv = Y
ARMv8 = Y
Power8 = Y
x86-32 = Y
x86-64 = Y
Usage doc = Y

View File

@ -49,7 +49,7 @@ libibverbs.
Features
--------
- Multi arch support: x86_64, POWER8, ARMv8.
- Multi arch support: x86_64, POWER8, ARMv8, i686.
- Multiple TX and RX queues.
- Support for scattered TX and RX frames.
- IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6 RSS on any number of queues.
@ -489,6 +489,10 @@ RMDA Core with Linux Kernel
- Minimal kernel version : v4.14 or the most recent 4.14-rc (see `Linux installation documentation`_)
- Minimal rdma-core version: v15+ commit 0c5f5765213a ("Merge pull request #227 from yishaih/tm")
(see `RDMA Core installation documentation`_)
- When building for i686 use:
- rdma-core version 18.0 or above built with 32bit support.
- Kernel version 4.14.41 or above.
.. _`Linux installation documentation`: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/plain/Documentation/admin-guide/README.rst
.. _`RDMA Core installation documentation`: https://raw.githubusercontent.com/linux-rdma/rdma-core/master/README.md

View File

@ -598,7 +598,7 @@ mlx5_uar_init_primary(struct rte_eth_dev *dev)
rte_memseg_walk(find_lower_va_bound, &addr);
/* keep distance to hugepages to minimize potential conflicts. */
addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE);
addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
/* anonymous mmap, no real memory consumption. */
addr = mmap(addr, MLX5_UAR_SIZE,
PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@ -939,6 +939,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
priv->device_attr = attr;
priv->pd = pd;
priv->mtu = ETHER_MTU;
#ifndef RTE_ARCH_64
/* Initialize UAR access locks for 32bit implementations. */
rte_spinlock_init(&priv->uar_lock_cq);
for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
rte_spinlock_init(&priv->uar_lock[i]);
#endif
/* Some internal functions rely on Netlink sockets, open them now. */
priv->nl_socket_rdma = mlx5_nl_init(0, NETLINK_RDMA);
priv->nl_socket_route = mlx5_nl_init(RTMGRP_LINK, NETLINK_ROUTE);

View File

@ -215,6 +215,11 @@ struct priv {
int nl_socket_rdma; /* Netlink socket (NETLINK_RDMA). */
int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */
uint32_t nl_sn; /* Netlink message sequence number. */
#ifndef RTE_ARCH_64
rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
/* UAR same-page access control required in 32bit implementations. */
#endif
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)

View File

@ -87,14 +87,28 @@
#define MLX5_LINK_STATUS_TIMEOUT 10
/* Reserved address space for UAR mapping. */
#define MLX5_UAR_SIZE (1ULL << 32)
#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
/* Offset of reserved UAR address space to hugepage memory. Offset is used here
* to minimize possibility of address next to hugepage being used by other code
* in either primary or secondary process, failing to map TX UAR would make TX
* packets invisible to HW.
*/
#define MLX5_UAR_OFFSET (1ULL << 32)
#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4))
/* Maximum number of UAR pages used by a port,
* These are the size and mask for an array of mutexes used to synchronize
* the access to port's UARs on platforms that do not support 64 bit writes.
* In such systems it is possible to issue the 64 bits DoorBells through two
* consecutive writes, each write 32 bits. The access to a UAR page (which can
* be accessible by all threads in the process) must be synchronized
* (for example, using a semaphore). Such a synchronization is not required
* when ringing DoorBells on different UAR pages.
* A port with 512 Tx queues uses 8, 4kBytes, UAR pages which are shared
* among the ports.
*/
#define MLX5_UAR_PAGE_NUM_MAX 64
#define MLX5_UAR_PAGE_NUM_MASK ((MLX5_UAR_PAGE_NUM_MAX) - 1)
/* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */
#define MLX5_MPRQ_STRIDE_NUM_N 6U

View File

@ -647,7 +647,8 @@ mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
doorbell = (uint64_t)doorbell_hi << 32;
doorbell |= rxq->cqn;
rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
mlx5_uar_write64(rte_cpu_to_be_64(doorbell),
cq_db_reg, rxq->uar_lock_cq);
}
/**
@ -1449,6 +1450,9 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
tmpl->rxq.elts_n = log2above(desc);
tmpl->rxq.elts =
(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
#ifndef RTE_ARCH_64
tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
#endif
tmpl->idx = idx;
rte_atomic32_inc(&tmpl->refcnt);
LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);

View File

@ -495,6 +495,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
unsigned int segs_n = 0;
const unsigned int max_inline = txq->max_inline;
uint64_t addr_64;
if (unlikely(!pkts_n))
return 0;
@ -711,12 +712,12 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
ds = 3;
use_dseg:
/* Add the remaining packet as a simple ds. */
addr = rte_cpu_to_be_64(addr);
addr_64 = rte_cpu_to_be_64(addr);
*dseg = (rte_v128u32_t){
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
addr,
addr >> 32,
addr_64,
addr_64 >> 32,
};
++ds;
if (!segs_n)
@ -750,12 +751,12 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
total_length += length;
#endif
/* Store segment information. */
addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
*dseg = (rte_v128u32_t){
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
addr,
addr >> 32,
addr_64,
addr_64 >> 32,
};
(*txq->elts)[++elts_head & elts_m] = buf;
if (--segs_n)
@ -1450,6 +1451,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
unsigned int mpw_room = 0;
unsigned int inl_pad = 0;
uint32_t inl_hdr;
uint64_t addr_64;
struct mlx5_mpw mpw = {
.state = MLX5_MPW_STATE_CLOSED,
};
@ -1586,13 +1588,13 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
((uintptr_t)mpw.data.raw +
inl_pad);
(*txq->elts)[elts_head++ & elts_m] = buf;
addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
uintptr_t));
addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
uintptr_t));
*dseg = (rte_v128u32_t) {
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
addr,
addr >> 32,
addr_64,
addr_64 >> 32,
};
mpw.data.raw = (volatile void *)(dseg + 1);
mpw.total_len += (inl_pad + sizeof(*dseg));

View File

@ -26,6 +26,8 @@
#include <rte_common.h>
#include <rte_hexdump.h>
#include <rte_atomic.h>
#include <rte_spinlock.h>
#include <rte_io.h>
#include "mlx5_utils.h"
#include "mlx5.h"
@ -118,6 +120,10 @@ struct mlx5_rxq_data {
void *cq_uar; /* CQ user access region. */
uint32_t cqn; /* CQ number. */
uint8_t cq_arm_sn; /* CQ arm seq number. */
#ifndef RTE_ARCH_64
rte_spinlock_t *uar_lock_cq;
/* CQ (UAR) access lock required for 32bit implementations */
#endif
uint32_t tunnel; /* Tunnel information. */
} __rte_cache_aligned;
@ -198,6 +204,10 @@ struct mlx5_txq_data {
volatile void *bf_reg; /* Blueflame register remapped. */
struct rte_mbuf *(*elts)[]; /* TX elements. */
struct mlx5_txq_stats stats; /* TX queue counters. */
#ifndef RTE_ARCH_64
rte_spinlock_t *uar_lock;
/* UAR access lock required for 32bit implementations */
#endif
} __rte_cache_aligned;
/* Verbs Rx queue elements. */
@ -353,6 +363,63 @@ void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl);
uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr);
/**
* Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
* 64bit architectures.
*
* @param val
* value to write in CPU endian format.
* @param addr
* Address to write to.
* @param lock
* Address of the lock to use for that UAR access.
*/
static __rte_always_inline void
__mlx5_uar_write64_relaxed(uint64_t val, volatile void *addr,
rte_spinlock_t *lock __rte_unused)
{
#ifdef RTE_ARCH_64
rte_write64_relaxed(val, addr);
#else /* !RTE_ARCH_64 */
rte_spinlock_lock(lock);
rte_write32_relaxed(val, addr);
rte_io_wmb();
rte_write32_relaxed(val >> 32,
(volatile void *)((volatile char *)addr + 4));
rte_spinlock_unlock(lock);
#endif
}
/**
* Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
* 64bit architectures while guaranteeing the order of execution with the
* code being executed.
*
* @param val
* value to write in CPU endian format.
* @param addr
* Address to write to.
* @param lock
* Address of the lock to use for that UAR access.
*/
static __rte_always_inline void
__mlx5_uar_write64(uint64_t val, volatile void *addr, rte_spinlock_t *lock)
{
rte_io_wmb();
__mlx5_uar_write64_relaxed(val, addr, lock);
}
/* Assist macros, used instead of directly calling the functions they wrap. */
#ifdef RTE_ARCH_64
#define mlx5_uar_write64_relaxed(val, dst, lock) \
__mlx5_uar_write64_relaxed(val, dst, NULL)
#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, NULL)
#else
#define mlx5_uar_write64_relaxed(val, dst, lock) \
__mlx5_uar_write64_relaxed(val, dst, lock)
#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock)
#endif
#ifndef NDEBUG
/**
* Verify or set magic value in CQE.
@ -619,7 +686,7 @@ mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
*txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
*dst = *src;
mlx5_uar_write64_relaxed(*src, dst, txq->uar_lock);
if (cond)
rte_wmb();
}

View File

@ -255,6 +255,9 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
struct mlx5_txq_ctrl *txq_ctrl;
int already_mapped;
size_t page_size = sysconf(_SC_PAGESIZE);
#ifndef RTE_ARCH_64
unsigned int lock_idx;
#endif
memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
/*
@ -281,7 +284,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
}
/* new address in reserved UAR address space. */
addr = RTE_PTR_ADD(priv->uar_base,
uar_va & (MLX5_UAR_SIZE - 1));
uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
if (!already_mapped) {
pages[pages_n++] = uar_va;
/* fixed mmap to specified address in reserved
@ -305,6 +308,12 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
else
assert(txq_ctrl->txq.bf_reg ==
RTE_PTR_ADD((void *)addr, off));
#ifndef RTE_ARCH_64
/* Assign a UAR lock according to UAR page number */
lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
MLX5_UAR_PAGE_NUM_MASK;
txq->uar_lock = &priv->uar_lock[lock_idx];
#endif
}
return 0;
}
@ -511,6 +520,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
rte_atomic32_inc(&txq_ibv->refcnt);
if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
dev->data->port_id, txq_ctrl->uar_mmap_offset);
} else {
DRV_LOG(ERR,
"port %u failed to retrieve UAR info, invalid"