numam-dpdk/drivers/net/mlx5/mlx5_rxtx_vec.h

/* SPDX-License-Identifier: BSD-3-Clause
 * Copyright 2017 6WIND S.A.
 * Copyright 2017 Mellanox Technologies, Ltd
 */

#ifndef RTE_PMD_MLX5_RXTX_VEC_H_
#define RTE_PMD_MLX5_RXTX_VEC_H_

#include <rte_common.h>
#include <rte_mbuf.h>

#include "mlx5_autoconf.h"
#include "mlx5_prm.h"

/* HW checksum offload capabilities of vectorized Tx. */
#define MLX5_VEC_TX_CKSUM_OFFLOAD_CAP \
	(DEV_TX_OFFLOAD_IPV4_CKSUM | \
	 DEV_TX_OFFLOAD_UDP_CKSUM | \
	 DEV_TX_OFFLOAD_TCP_CKSUM | \
	 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)

/* HW offload capabilities of vectorized Tx. */
#define MLX5_VEC_TX_OFFLOAD_CAP \
	(MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | \
	 DEV_TX_OFFLOAD_MULTI_SEGS)

/*
 * Compile time sanity check for vectorized functions.
 */

#define S_ASSERT_RTE_MBUF(s) \
	static_assert(s, "A field of struct rte_mbuf is changed")
#define S_ASSERT_MLX5_CQE(s) \
	static_assert(s, "A field of struct mlx5_cqe is changed")

/* rxq_cq_decompress_v() */
S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, pkt_len) ==
		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, data_len) ==
		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, hash) ==
		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);

/* rxq_cq_to_ptype_oflags_v() */
S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, ol_flags) ==
		  offsetof(struct rte_mbuf, rearm_data) + 8);
S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, rearm_data) ==
		  RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));

/* rxq_burst_v() */
S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, pkt_len) ==
		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, data_len) ==
		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
#if (RTE_CACHE_LINE_SIZE == 128)
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, pkt_info) == 64);
#else
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, pkt_info) == 0);
#endif
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rx_hash_res) ==
		  offsetof(struct mlx5_cqe, pkt_info) + 12);
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rsvd1) +
		  sizeof(((struct mlx5_cqe *)0)->rsvd1) ==
		  offsetof(struct mlx5_cqe, hdr_type_etc));
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, vlan_info) ==
		  offsetof(struct mlx5_cqe, hdr_type_etc) + 2);
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rsvd2) +
		  sizeof(((struct mlx5_cqe *)0)->rsvd2) ==
		  offsetof(struct mlx5_cqe, byte_cnt));
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, sop_drop_qpn) ==
		  RTE_ALIGN(offsetof(struct mlx5_cqe, sop_drop_qpn), 8));
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, op_own) ==
		  offsetof(struct mlx5_cqe, sop_drop_qpn) + 7);

/**
 * Replenish buffers for RX in bulk.
 *
 * @param rxq
 *   Pointer to RX queue structure.
 * @param n
 *   Number of buffers to be replenished.
 */
static inline void
mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
{
	const uint16_t q_n = 1 << rxq->elts_n;
	const uint16_t q_mask = q_n - 1;
	uint16_t elts_idx = rxq->rq_ci & q_mask;
	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
	volatile struct mlx5_wqe_data_seg *wq =
		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
	unsigned int i;

	assert(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH);
	assert(n <= (uint16_t)(q_n - (rxq->rq_ci - rxq->rq_pi)));
	assert(MLX5_VPMD_RXQ_RPLNSH_THRESH > MLX5_VPMD_DESCS_PER_LOOP);
	/* Not to cross queue end. */
	n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
	if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
		rxq->stats.rx_nombuf += n;
		return;
	}
	for (i = 0; i < n; ++i) {
		wq[i].addr = rte_cpu_to_be_64((uintptr_t)elts[i]->buf_addr +
					      RTE_PKTMBUF_HEADROOM);
		/* If there's only one MR, no need to replace LKey in WQE. */
		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
			wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
	}
	rxq->rq_ci += n;
	/* Prevent overflowing into consumed mbufs. */
	elts_idx = rxq->rq_ci & q_mask;
	for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
		(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
	rte_cio_wmb();
	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
}

#endif /* RTE_PMD_MLX5_RXTX_VEC_H_ */
net/mlx5: use SPDX tags in 6WIND copyrighted files Signed-off-by: Olivier Matz <olivier.matz@6wind.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Acked-by: Bruce Richardson <bruce.richardson@intel.com> Acked-by: Thomas Monjalon <thomas@monjalon.net> 2018-01-29 13:11:30 +00:00			`/* SPDX-License-Identifier: BSD-3-Clause`
			`* Copyright 2017 6WIND S.A.`
align SPDX Mellanox copyrights Aligning Mellanox SPDX copyrights to a single format. In addition replace to SPDX licence files which were missed. Signed-off-by: Shahaf Shuler <shahafs@mellanox.com> Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> 2018-03-20 19:20:35 +00:00			`* Copyright 2017 Mellanox Technologies, Ltd`
net/mlx5: use static assert for compile-time sanity checks Replace compile-time sanity check with static_assert() as c11 standard has been set. Add mlx5_rxtx_vec.h and move the sanity checks to the file Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:56 +00:00			`*/`

			`#ifndef RTE_PMD_MLX5_RXTX_VEC_H_`
			`#define RTE_PMD_MLX5_RXTX_VEC_H_`

			`#include <rte_common.h>`
			`#include <rte_mbuf.h>`

			`#include "mlx5_autoconf.h"`
			`#include "mlx5_prm.h"`

net/mlx5: convert to new Tx offloads API Ethdev Tx offloads API has changed since: commit cba7f53b717d ("ethdev: introduce Tx queue offloads API") This commit support the new Tx offloads API. Signed-off-by: Shahaf Shuler <shahafs@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2018-01-10 09:17:00 +00:00			`/* HW checksum offload capabilities of vectorized Tx. */`
			`#define MLX5_VEC_TX_CKSUM_OFFLOAD_CAP \`
			`(DEV_TX_OFFLOAD_IPV4_CKSUM \| \`
			`DEV_TX_OFFLOAD_UDP_CKSUM \| \`
			`DEV_TX_OFFLOAD_TCP_CKSUM \| \`
			`DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)`

			`/* HW offload capabilities of vectorized Tx. */`
			`#define MLX5_VEC_TX_OFFLOAD_CAP \`
			`(MLX5_VEC_TX_CKSUM_OFFLOAD_CAP \| \`
			`DEV_TX_OFFLOAD_MULTI_SEGS)`

net/mlx5: use static assert for compile-time sanity checks Replace compile-time sanity check with static_assert() as c11 standard has been set. Add mlx5_rxtx_vec.h and move the sanity checks to the file Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:56 +00:00			`/*`
			`* Compile time sanity check for vectorized functions.`
			`*/`

			`#define S_ASSERT_RTE_MBUF(s) \`
			`static_assert(s, "A field of struct rte_mbuf is changed")`
			`#define S_ASSERT_MLX5_CQE(s) \`
			`static_assert(s, "A field of struct mlx5_cqe is changed")`

			`/* rxq_cq_decompress_v() */`
			`S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, pkt_len) ==`
			`offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);`
			`S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, data_len) ==`
			`offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);`
			`S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, hash) ==`
			`offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);`

			`/* rxq_cq_to_ptype_oflags_v() */`
			`S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, ol_flags) ==`
			`offsetof(struct rte_mbuf, rearm_data) + 8);`
			`S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, rearm_data) ==`
			`RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));`

			`/* rxq_burst_v() */`
			`S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, pkt_len) ==`
			`offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);`
			`S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, data_len) ==`
			`offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);`
net/mlx5: add vectorized Rx/Tx burst for ARM Brings vectorization through NEON instructions. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:47:00 +00:00			`#if (RTE_CACHE_LINE_SIZE == 128)`
			`S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, pkt_info) == 64);`
			`#else`
net/mlx5: use static assert for compile-time sanity checks Replace compile-time sanity check with static_assert() as c11 standard has been set. Add mlx5_rxtx_vec.h and move the sanity checks to the file Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:56 +00:00			`S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, pkt_info) == 0);`
net/mlx5: add vectorized Rx/Tx burst for ARM Brings vectorization through NEON instructions. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:47:00 +00:00			`#endif`
net/mlx5: use static assert for compile-time sanity checks Replace compile-time sanity check with static_assert() as c11 standard has been set. Add mlx5_rxtx_vec.h and move the sanity checks to the file Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:56 +00:00			`S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rx_hash_res) ==`
			`offsetof(struct mlx5_cqe, pkt_info) + 12);`
			`S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rsvd1) +`
			`sizeof(((struct mlx5_cqe *)0)->rsvd1) ==`
			`offsetof(struct mlx5_cqe, hdr_type_etc));`
			`S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, vlan_info) ==`
			`offsetof(struct mlx5_cqe, hdr_type_etc) + 2);`
			`S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rsvd2) +`
			`sizeof(((struct mlx5_cqe *)0)->rsvd2) ==`
			`offsetof(struct mlx5_cqe, byte_cnt));`
			`S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, sop_drop_qpn) ==`
			`RTE_ALIGN(offsetof(struct mlx5_cqe, sop_drop_qpn), 8));`
			`S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, op_own) ==`
			`offsetof(struct mlx5_cqe, sop_drop_qpn) + 7);`

net/mlx5: separate shareable vector functions Considering more architecture (e.g. ARM and PowerPC) will be added for vectorized Rx/Tx burst, all the shareable functions which don't use any vector intrinsics need to be separated from architecture-dependent functions. All the vector functions for x86 SSE are moved to a new header file - mlx5_rxtx_vec_sse.h. And shareable common functions are now in mlx5_rxtx_vec.c. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:57 +00:00			`/**`
			`* Replenish buffers for RX in bulk.`
			`*`
			`* @param rxq`
			`* Pointer to RX queue structure.`
			`* @param n`
			`* Number of buffers to be replenished.`
			`*/`
			`static inline void`
			`mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)`
			`{`
			`const uint16_t q_n = 1 << rxq->elts_n;`
			`const uint16_t q_mask = q_n - 1;`
net/mlx5: fix deadlock due to buffered slots in Rx SW ring When replenishing Rx ring, there're always buffered slots reserved between consumed entries and HW owned entries. These have to be filled with fake mbufs to protect from possible overflow rather than optimistically expecting successful replenishment which can cause deadlock with small-sized queue. Fixes: fc048bd52cb7 ("net/mlx5: fix overflow of Rx SW ring") Cc: stable@dpdk.org Reported-by: Martin Weiser <martin.weiser@allegro-packets.com> Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Tested-by: Martin Weiser <martin.weiser@allegro-packets.com> 2017-10-10 14:04:02 +00:00			`uint16_t elts_idx = rxq->rq_ci & q_mask;`
net/mlx5: separate shareable vector functions Considering more architecture (e.g. ARM and PowerPC) will be added for vectorized Rx/Tx burst, all the shareable functions which don't use any vector intrinsics need to be separated from architecture-dependent functions. All the vector functions for x86 SSE are moved to a new header file - mlx5_rxtx_vec_sse.h. And shareable common functions are now in mlx5_rxtx_vec.c. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:57 +00:00			`struct rte_mbuf *elts = &(rxq->elts)[elts_idx];`
net/mlx5: add Multi-Packet Rx support Multi-Packet Rx Queue (MPRQ a.k.a Striding RQ) can further save PCIe bandwidth by posting a single large buffer for multiple packets. Instead of posting a buffer per a packet, one large buffer is posted in order to receive multiple packets on the buffer. A MPRQ buffer consists of multiple fixed-size strides and each stride receives one packet. Rx packet is mem-copied to a user-provided mbuf if the size of Rx packet is comparatively small, or PMD attaches the Rx packet to the mbuf by external buffer attachment - rte_pktmbuf_attach_extbuf(). A mempool for external buffers will be allocated and managed by PMD. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Shahaf Shuler <shahafs@mellanox.com> 2018-05-09 11:13:50 +00:00			`volatile struct mlx5_wqe_data_seg *wq =`
			`&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];`
net/mlx5: separate shareable vector functions Considering more architecture (e.g. ARM and PowerPC) will be added for vectorized Rx/Tx burst, all the shareable functions which don't use any vector intrinsics need to be separated from architecture-dependent functions. All the vector functions for x86 SSE are moved to a new header file - mlx5_rxtx_vec_sse.h. And shareable common functions are now in mlx5_rxtx_vec.c. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:57 +00:00			`unsigned int i;`

			`assert(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH);`
			`assert(n <= (uint16_t)(q_n - (rxq->rq_ci - rxq->rq_pi)));`
			`assert(MLX5_VPMD_RXQ_RPLNSH_THRESH > MLX5_VPMD_DESCS_PER_LOOP);`
			`/* Not to cross queue end. */`
			`n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);`
			`if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {`
			`rxq->stats.rx_nombuf += n;`
			`return;`
			`}`
net/mlx5: add new memory region support This is the new design of Memory Region (MR) for mlx PMD, in order to: - Accommodate the new memory hotplug model. - Support non-contiguous Mempool. There are multiple layers for MR search. L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized array by linear search. L0/L1 is in an inline function - mlx5_mr_lookup_cache(). If L1 misses, the bottom-half function is called to look up the address from the bigger local cache of the queue. This is L2 - mlx5_mr_addr2mr_bh() and it is not an inline function. Data structure for L2 is the Binary Tree. If L2 misses, the search falls into the slowest path which takes locks in order to access global device cache (priv->mr.cache) which is also a B-tree and caches the original MR list (priv->mr.mr_list) of the device. Unless the global cache is overflowed, it is all-inclusive of the MR list. This is L3 - mlx5_mr_lookup_dev(). The size of the L3 cache table is limited and can't be expanded on the fly due to deadlock. Refer to the comments in the code for the details - mr_lookup_dev(). If L3 is overflowed, the list will have to be searched directly bypassing the cache although it is slower. If L3 misses, a new MR for the address should be created - mlx5_mr_create(). When it creates a new MR, it tries to register adjacent memsegs as much as possible which are virtually contiguous around the address. This must take two locks - memory_hotplug_lock and priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any allocation/free of memory inside. In the free callback of the memory hotplug event, freed space is searched from the MR list and corresponding bits are cleared from the bitmap of MRs. This can fragment a MR and the MR will have multiple search entries in the caches. Once there's a change by the event, the global cache must be rebuilt and all the per-queue caches will be flushed as well. If memory is frequently freed in run-time, that may cause jitter on dataplane processing in the worst case by incurring MR cache flush and rebuild. But, it would be the least probable scenario. To guarantee the most optimal performance, it is highly recommended to use an EAL option - '--socket-mem'. Then, the reserved memory will be pinned and won't be freed dynamically. And it is also recommended to configure per-lcore cache of Mempool. Even though there're many MRs for a device or MRs are highly fragmented, the cache of Mempool will be much helpful to reduce misses on per-queue caches anyway. '--legacy-mem' is also supported. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> 2018-05-09 11:09:04 +00:00			`for (i = 0; i < n; ++i) {`
net/mlx5: separate shareable vector functions Considering more architecture (e.g. ARM and PowerPC) will be added for vectorized Rx/Tx burst, all the shareable functions which don't use any vector intrinsics need to be separated from architecture-dependent functions. All the vector functions for x86 SSE are moved to a new header file - mlx5_rxtx_vec_sse.h. And shareable common functions are now in mlx5_rxtx_vec.c. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:57 +00:00			`wq[i].addr = rte_cpu_to_be_64((uintptr_t)elts[i]->buf_addr +`
			`RTE_PKTMBUF_HEADROOM);`
net/mlx5: add new memory region support This is the new design of Memory Region (MR) for mlx PMD, in order to: - Accommodate the new memory hotplug model. - Support non-contiguous Mempool. There are multiple layers for MR search. L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized array by linear search. L0/L1 is in an inline function - mlx5_mr_lookup_cache(). If L1 misses, the bottom-half function is called to look up the address from the bigger local cache of the queue. This is L2 - mlx5_mr_addr2mr_bh() and it is not an inline function. Data structure for L2 is the Binary Tree. If L2 misses, the search falls into the slowest path which takes locks in order to access global device cache (priv->mr.cache) which is also a B-tree and caches the original MR list (priv->mr.mr_list) of the device. Unless the global cache is overflowed, it is all-inclusive of the MR list. This is L3 - mlx5_mr_lookup_dev(). The size of the L3 cache table is limited and can't be expanded on the fly due to deadlock. Refer to the comments in the code for the details - mr_lookup_dev(). If L3 is overflowed, the list will have to be searched directly bypassing the cache although it is slower. If L3 misses, a new MR for the address should be created - mlx5_mr_create(). When it creates a new MR, it tries to register adjacent memsegs as much as possible which are virtually contiguous around the address. This must take two locks - memory_hotplug_lock and priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any allocation/free of memory inside. In the free callback of the memory hotplug event, freed space is searched from the MR list and corresponding bits are cleared from the bitmap of MRs. This can fragment a MR and the MR will have multiple search entries in the caches. Once there's a change by the event, the global cache must be rebuilt and all the per-queue caches will be flushed as well. If memory is frequently freed in run-time, that may cause jitter on dataplane processing in the worst case by incurring MR cache flush and rebuild. But, it would be the least probable scenario. To guarantee the most optimal performance, it is highly recommended to use an EAL option - '--socket-mem'. Then, the reserved memory will be pinned and won't be freed dynamically. And it is also recommended to configure per-lcore cache of Mempool. Even though there're many MRs for a device or MRs are highly fragmented, the cache of Mempool will be much helpful to reduce misses on per-queue caches anyway. '--legacy-mem' is also supported. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> 2018-05-09 11:09:04 +00:00			`/* If there's only one MR, no need to replace LKey in WQE. */`
			`if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))`
			`wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);`
			`}`
net/mlx5: separate shareable vector functions Considering more architecture (e.g. ARM and PowerPC) will be added for vectorized Rx/Tx burst, all the shareable functions which don't use any vector intrinsics need to be separated from architecture-dependent functions. All the vector functions for x86 SSE are moved to a new header file - mlx5_rxtx_vec_sse.h. And shareable common functions are now in mlx5_rxtx_vec.c. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:57 +00:00			`rxq->rq_ci += n;`
net/mlx5: fix deadlock due to buffered slots in Rx SW ring When replenishing Rx ring, there're always buffered slots reserved between consumed entries and HW owned entries. These have to be filled with fake mbufs to protect from possible overflow rather than optimistically expecting successful replenishment which can cause deadlock with small-sized queue. Fixes: fc048bd52cb7 ("net/mlx5: fix overflow of Rx SW ring") Cc: stable@dpdk.org Reported-by: Martin Weiser <martin.weiser@allegro-packets.com> Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Tested-by: Martin Weiser <martin.weiser@allegro-packets.com> 2017-10-10 14:04:02 +00:00			`/* Prevent overflowing into consumed mbufs. */`
			`elts_idx = rxq->rq_ci & q_mask;`
			`for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)`
			`(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;`
net/mlx5: replace I/O memory barrier with coherent version Signed-off-by: Yongseok Koh <yskoh@mellanox.com> 2018-01-25 21:02:49 +00:00			`rte_cio_wmb();`
net/mlx5: separate shareable vector functions Considering more architecture (e.g. ARM and PowerPC) will be added for vectorized Rx/Tx burst, all the shareable functions which don't use any vector intrinsics need to be separated from architecture-dependent functions. All the vector functions for x86 SSE are moved to a new header file - mlx5_rxtx_vec_sse.h. And shareable common functions are now in mlx5_rxtx_vec.c. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:57 +00:00			`*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);`
			`}`

net/mlx5: use static assert for compile-time sanity checks Replace compile-time sanity check with static_assert() as c11 standard has been set. Add mlx5_rxtx_vec.h and move the sanity checks to the file Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> 2017-10-09 18:46:56 +00:00			`#endif /* RTE_PMD_MLX5_RXTX_VEC_H_ */`