numam-dpdk/drivers/net/mlx5/mlx5_rxq.c

3279 lines
88 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2015 6WIND S.A.
* Copyright 2015 Mellanox Technologies, Ltd
*/
#include <stddef.h>
#include <errno.h>
#include <string.h>
#include <stdint.h>
#include <fcntl.h>
#include <sys/queue.h>
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <ethdev_driver.h>
#include <rte_common.h>
#include <rte_interrupts.h>
#include <rte_debug.h>
#include <rte_io.h>
#include <rte_eal_paging.h>
#include <mlx5_glue.h>
#include <mlx5_malloc.h>
common/mlx5: fix post doorbell barrier The rdma-core library can map doorbell register in two ways, depending on the environment variable "MLX5_SHUT_UP_BF": - as regular cached memory, the variable is either missing or set to zero. This type of mapping may cause the significant doorbell register writing latency and requires an explicit memory write barrier to mitigate this issue and prevent write combining. - as non-cached memory, the variable is present and set to not "0" value. This type of mapping may cause performance impact under heavy loading conditions but the explicit write memory barrier is not required and it may improve core performance. The UAR creation function maps a doorbell in one of the above ways according to the system. In run time, it always adds an explicit memory barrier after writing to. In cases where the doorbell was mapped as non-cached memory, the explicit memory barrier is unnecessary and may impair performance. The commit [1] solved this problem for a Tx queue. In run time, it checks the mapping type and provides the memory barrier after writing to a Tx doorbell register if it is needed. The mapping type is extracted directly from the uar_mmap_offset field in the queue properties. This patch shares this code between the drivers and extends the above solution for each of them. [1] commit 8409a28573d3 ("net/mlx5: control transmit doorbell register mapping") Fixes: f8c97babc9f4 ("compress/mlx5: add data-path functions") Fixes: 8e196c08ab53 ("crypto/mlx5: support enqueue/dequeue operations") Fixes: 4d4e245ad637 ("regex/mlx5: support enqueue") Cc: stable@dpdk.org Signed-off-by: Michael Baum <michaelba@nvidia.com> Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com> Acked-by: Matan Azrad <matan@nvidia.com>
2021-11-03 18:35:13 +00:00
#include <mlx5_common.h>
#include <mlx5_common_mr.h>
#include "mlx5_defs.h"
#include "mlx5.h"
#include "mlx5_rx.h"
#include "mlx5_utils.h"
#include "mlx5_autoconf.h"
#include "mlx5_devx.h"
#include "rte_pmd_mlx5.h"
mlx5: refactor Rx code for the new verbs RSS API The new Verbs RSS API is lower-level than the previous one and much more flexible but requires RX queues to use Work Queues (WQs) internally instead of Queue Pairs (QPs), which are grouped in an indirection table used by a new kind of hash RX QPs. Hash RX QPs and the indirection table together replace the parent RSS QP while WQs are mostly similar to child QPs. RSS hash key is not configurable yet. Summary of changes: - Individual DPDK RX queues do not store flow properties anymore, this info is now part of the hash RX queues. - All functions affecting the parent queue when RSS is enabled or the basic queues otherwise are modified to affect hash RX queues instead. - Hash RX queues are also used when a single DPDK RX queue is configured (no RSS) to remove that special case. - Hash RX queues and indirection table are created/destroyed when device is started/stopped in addition to create/destroy flows. - Contrary to QPs, WQs are moved to the "ready" state before posting RX buffers, otherwise they are ignored. - Resource domain information is added to WQs for better performance. - CQs are not resized anymore when switching between non-SG and SG modes as it does not work correctly with WQs. Use the largest possible size instead, since CQ size does not have to be the same as the number of elements in the RX queue. This also applies to the maximum number of outstanding WRs in a WQ (max_recv_wr). Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Or Ami <ora@mellanox.com> Signed-off-by: Yaacov Hazan <yaacovh@mellanox.com>
2015-10-30 18:55:06 +00:00
/* Default RSS hash key also used for ConnectX-3. */
uint8_t rss_hash_default_key[] = {
mlx5: refactor Rx code for the new verbs RSS API The new Verbs RSS API is lower-level than the previous one and much more flexible but requires RX queues to use Work Queues (WQs) internally instead of Queue Pairs (QPs), which are grouped in an indirection table used by a new kind of hash RX QPs. Hash RX QPs and the indirection table together replace the parent RSS QP while WQs are mostly similar to child QPs. RSS hash key is not configurable yet. Summary of changes: - Individual DPDK RX queues do not store flow properties anymore, this info is now part of the hash RX queues. - All functions affecting the parent queue when RSS is enabled or the basic queues otherwise are modified to affect hash RX queues instead. - Hash RX queues are also used when a single DPDK RX queue is configured (no RSS) to remove that special case. - Hash RX queues and indirection table are created/destroyed when device is started/stopped in addition to create/destroy flows. - Contrary to QPs, WQs are moved to the "ready" state before posting RX buffers, otherwise they are ignored. - Resource domain information is added to WQs for better performance. - CQs are not resized anymore when switching between non-SG and SG modes as it does not work correctly with WQs. Use the largest possible size instead, since CQ size does not have to be the same as the number of elements in the RX queue. This also applies to the maximum number of outstanding WRs in a WQ (max_recv_wr). Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Or Ami <ora@mellanox.com> Signed-off-by: Yaacov Hazan <yaacovh@mellanox.com>
2015-10-30 18:55:06 +00:00
0x2c, 0xc6, 0x81, 0xd1,
0x5b, 0xdb, 0xf4, 0xf7,
0xfc, 0xa2, 0x83, 0x19,
0xdb, 0x1a, 0x3e, 0x94,
0x6b, 0x9e, 0x38, 0xd9,
0x2c, 0x9c, 0x03, 0xd1,
0xad, 0x99, 0x44, 0xa7,
0xd9, 0x56, 0x3d, 0x59,
0x06, 0x3c, 0x25, 0xf3,
0xfc, 0x1f, 0xdc, 0x2a,
};
/* Length of the default RSS hash key. */
static_assert(MLX5_RSS_HASH_KEY_LEN ==
(unsigned int)sizeof(rss_hash_default_key),
"wrong RSS default key size.");
/**
* Calculate the number of CQEs in CQ for the Rx queue.
*
* @param rxq_data
* Pointer to receive queue structure.
*
* @return
* Number of CQEs in CQ.
*/
unsigned int
mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data)
{
unsigned int cqe_n;
unsigned int wqe_n = 1 << rxq_data->elts_n;
if (mlx5_rxq_mprq_enabled(rxq_data))
cqe_n = wqe_n * RTE_BIT32(rxq_data->log_strd_num) - 1;
else
cqe_n = wqe_n - 1;
return cqe_n;
}
/**
* Allocate RX queue elements for Multi-Packet RQ.
*
* @param rxq_ctrl
* Pointer to RX queue structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
{
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
unsigned int wqe_n = 1 << rxq->elts_n;
unsigned int i;
int err;
/* Iterate on segments. */
for (i = 0; i <= wqe_n; ++i) {
struct mlx5_mprq_buf *buf;
if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) {
DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id);
rte_errno = ENOMEM;
goto error;
}
if (i < wqe_n)
(*rxq->mprq_bufs)[i] = buf;
else
rxq->mprq_repl = buf;
}
DRV_LOG(DEBUG,
"port %u MPRQ queue %u allocated and configured %u segments",
rxq->port_id, rxq->idx, wqe_n);
return 0;
error:
err = rte_errno; /* Save rte_errno before cleanup. */
wqe_n = i;
for (i = 0; (i != wqe_n); ++i) {
if ((*rxq->mprq_bufs)[i] != NULL)
rte_mempool_put(rxq->mprq_mp,
(*rxq->mprq_bufs)[i]);
(*rxq->mprq_bufs)[i] = NULL;
}
DRV_LOG(DEBUG, "port %u MPRQ queue %u failed, freed everything",
rxq->port_id, rxq->idx);
rte_errno = err; /* Restore rte_errno. */
return -rte_errno;
}
/**
* Allocate RX queue elements for Single-Packet RQ.
*
* @param rxq_ctrl
* Pointer to RX queue structure.
*
* @return
* 0 on success, negative errno value on failure.
*/
static int
rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
{
const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
unsigned int elts_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
RTE_BIT32(rxq_ctrl->rxq.elts_n) *
RTE_BIT32(rxq_ctrl->rxq.log_strd_num) :
RTE_BIT32(rxq_ctrl->rxq.elts_n);
bool has_vec_support = mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0;
unsigned int i;
int err;
/* Iterate on segments. */
for (i = 0; (i != elts_n); ++i) {
struct mlx5_eth_rxseg *seg = &rxq_ctrl->rxq.rxseg[i % sges_n];
struct rte_mbuf *buf;
buf = rte_pktmbuf_alloc(seg->mp);
if (buf == NULL) {
if (rxq_ctrl->share_group == 0)
DRV_LOG(ERR, "port %u queue %u empty mbuf pool",
RXQ_PORT_ID(rxq_ctrl),
rxq_ctrl->rxq.idx);
else
DRV_LOG(ERR, "share group %u queue %u empty mbuf pool",
rxq_ctrl->share_group,
rxq_ctrl->share_qid);
rte_errno = ENOMEM;
goto error;
}
/* Only vectored Rx routines rely on headroom size. */
MLX5_ASSERT(!has_vec_support ||
DATA_OFF(buf) >= RTE_PKTMBUF_HEADROOM);
/* Buffer is supposed to be empty. */
MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0);
MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0);
MLX5_ASSERT(!buf->next);
SET_DATA_OFF(buf, seg->offset);
PORT(buf) = rxq_ctrl->rxq.port_id;
DATA_LEN(buf) = seg->length;
PKT_LEN(buf) = seg->length;
NB_SEGS(buf) = 1;
(*rxq_ctrl->rxq.elts)[i] = buf;
}
/* If Rx vector is activated. */
if (has_vec_support) {
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
struct rte_pktmbuf_pool_private *priv =
(struct rte_pktmbuf_pool_private *)
rte_mempool_get_priv(rxq_ctrl->rxq.mp);
int j;
/* Initialize default rearm_data for vPMD. */
mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
rte_mbuf_refcnt_set(mbuf_init, 1);
mbuf_init->nb_segs = 1;
/* For shared queues port is provided in CQE */
mbuf_init->port = rxq->shared ? 0 : rxq->port_id;
if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF)
mbuf_init->ol_flags = RTE_MBUF_F_EXTERNAL;
/*
* prevent compiler reordering:
* rearm_data covers previous fields.
*/
rte_compiler_barrier();
rxq->mbuf_initializer =
*(rte_xmm_t *)&mbuf_init->rearm_data;
/* Padding with a fake mbuf for vectorized Rx. */
for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
}
if (rxq_ctrl->share_group == 0)
DRV_LOG(DEBUG,
"port %u SPRQ queue %u allocated and configured %u segments (max %u packets)",
RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx, elts_n,
elts_n / (1 << rxq_ctrl->rxq.sges_n));
else
DRV_LOG(DEBUG,
"share group %u SPRQ queue %u allocated and configured %u segments (max %u packets)",
rxq_ctrl->share_group, rxq_ctrl->share_qid, elts_n,
elts_n / (1 << rxq_ctrl->rxq.sges_n));
return 0;
error:
err = rte_errno; /* Save rte_errno before cleanup. */
elts_n = i;
for (i = 0; (i != elts_n); ++i) {
if ((*rxq_ctrl->rxq.elts)[i] != NULL)
rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
(*rxq_ctrl->rxq.elts)[i] = NULL;
}
if (rxq_ctrl->share_group == 0)
DRV_LOG(DEBUG, "port %u SPRQ queue %u failed, freed everything",
RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx);
else
DRV_LOG(DEBUG, "share group %u SPRQ queue %u failed, freed everything",
rxq_ctrl->share_group, rxq_ctrl->share_qid);
rte_errno = err; /* Restore rte_errno. */
return -rte_errno;
}
/**
* Allocate RX queue elements.
*
* @param rxq_ctrl
* Pointer to RX queue structure.
*
* @return
* 0 on success, negative errno value on failure.
*/
int
rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
{
int ret = 0;
/**
* For MPRQ we need to allocate both MPRQ buffers
* for WQEs and simple mbufs for vector processing.
*/
if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
ret = rxq_alloc_elts_mprq(rxq_ctrl);
if (ret == 0)
ret = rxq_alloc_elts_sprq(rxq_ctrl);
return ret;
}
/**
* Free RX queue elements for Multi-Packet RQ.
*
* @param rxq_ctrl
* Pointer to RX queue structure.
*/
static void
rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
{
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
uint16_t i;
DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing %d WRs",
rxq->port_id, rxq->idx, (1u << rxq->elts_n));
if (rxq->mprq_bufs == NULL)
return;
for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
if ((*rxq->mprq_bufs)[i] != NULL)
mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]);
(*rxq->mprq_bufs)[i] = NULL;
}
if (rxq->mprq_repl != NULL) {
mlx5_mprq_buf_free(rxq->mprq_repl);
rxq->mprq_repl = NULL;
}
}
/**
* Free RX queue elements for Single-Packet RQ.
*
* @param rxq_ctrl
* Pointer to RX queue structure.
*/
static void
rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
{
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
const uint16_t q_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) :
RTE_BIT32(rxq->elts_n);
const uint16_t q_mask = q_n - 1;
uint16_t elts_ci = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
rxq->elts_ci : rxq->rq_ci;
uint16_t used = q_n - (elts_ci - rxq->rq_pi);
uint16_t i;
if (rxq_ctrl->share_group == 0)
DRV_LOG(DEBUG, "port %u Rx queue %u freeing %d WRs",
RXQ_PORT_ID(rxq_ctrl), rxq->idx, q_n);
else
DRV_LOG(DEBUG, "share group %u Rx queue %u freeing %d WRs",
rxq_ctrl->share_group, rxq_ctrl->share_qid, q_n);
if (rxq->elts == NULL)
return;
/**
* Some mbuf in the Ring belongs to the application.
* They cannot be freed.
*/
if (mlx5_rxq_check_vec_support(rxq) > 0) {
for (i = 0; i < used; ++i)
(*rxq->elts)[(elts_ci + i) & q_mask] = NULL;
rxq->rq_pi = elts_ci;
}
for (i = 0; i != q_n; ++i) {
if ((*rxq->elts)[i] != NULL)
rte_pktmbuf_free_seg((*rxq->elts)[i]);
(*rxq->elts)[i] = NULL;
}
}
/**
* Free RX queue elements.
*
* @param rxq_ctrl
* Pointer to RX queue structure.
*/
static void
rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
{
/*
* For MPRQ we need to allocate both MPRQ buffers
* for WQEs and simple mbufs for vector processing.
*/
if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
rxq_free_elts_mprq(rxq_ctrl);
rxq_free_elts_sprq(rxq_ctrl);
}
/**
* Returns the per-queue supported offloads.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* Supported Rx offloads.
*/
uint64_t
mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
uint64_t offloads = (RTE_ETH_RX_OFFLOAD_SCATTER |
RTE_ETH_RX_OFFLOAD_TIMESTAMP |
RTE_ETH_RX_OFFLOAD_RSS_HASH);
if (!priv->config.mprq.enabled)
offloads |= RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT;
if (priv->sh->config.hw_fcs_strip)
offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC;
if (priv->sh->dev_cap.hw_csum)
offloads |= (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
RTE_ETH_RX_OFFLOAD_TCP_CKSUM);
if (priv->sh->dev_cap.hw_vlan_strip)
offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
if (priv->sh->dev_cap.lro_supported)
offloads |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
return offloads;
}
/**
* Returns the per-port supported offloads.
*
* @return
* Supported Rx offloads.
*/
uint64_t
mlx5_get_rx_port_offloads(void)
{
uint64_t offloads = RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
return offloads;
}
/**
* Verify if the queue can be released.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
*
* @return
* 1 if the queue can be released
* 0 if the queue can not be released, there are references to it.
* Negative errno and rte_errno is set if queue doesn't exist.
*/
static int
mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
if (rxq == NULL) {
rte_errno = EINVAL;
return -rte_errno;
}
return (__atomic_load_n(&rxq->refcnt, __ATOMIC_RELAXED) == 1);
}
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
/* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */
static void
rxq_sync_cq(struct mlx5_rxq_data *rxq)
{
const uint16_t cqe_n = 1 << rxq->cqe_n;
const uint16_t cqe_mask = cqe_n - 1;
volatile struct mlx5_cqe *cqe;
int ret, i;
i = cqe_n;
do {
cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
if (ret == MLX5_CQE_STATUS_HW_OWN)
break;
if (ret == MLX5_CQE_STATUS_ERR) {
rxq->cq_ci++;
continue;
}
MLX5_ASSERT(ret == MLX5_CQE_STATUS_SW_OWN);
if (MLX5_CQE_FORMAT(cqe->op_own) != MLX5_COMPRESSED) {
rxq->cq_ci++;
continue;
}
/* Compute the next non compressed CQE. */
rxq->cq_ci += rte_be_to_cpu_32(cqe->byte_cnt);
} while (--i);
/* Move all CQEs to HW ownership, including possible MiniCQEs. */
for (i = 0; i < cqe_n; i++) {
cqe = &(*rxq->cqes)[i];
cqe->op_own = MLX5_CQE_INVALIDATE;
}
/* Resync CQE and WQE (WQ in RESET state). */
rte_io_wmb();
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
rte_io_wmb();
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
*rxq->rq_db = rte_cpu_to_be_32(0);
rte_io_wmb();
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
}
/**
* Rx queue stop. Device queue goes to the RESET state,
* all involved mbufs are freed from WQ.
*
* @param dev
* Pointer to Ethernet device structure.
* @param idx
* RX queue index.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
int ret;
MLX5_ASSERT(rxq != NULL && rxq_ctrl != NULL);
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RDY2RST);
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
if (ret) {
DRV_LOG(ERR, "Cannot change Rx WQ state to RESET: %s",
strerror(errno));
rte_errno = errno;
return ret;
}
/* Remove all processes CQEs. */
rxq_sync_cq(&rxq_ctrl->rxq);
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
/* Free all involved mbufs. */
rxq_free_elts(rxq_ctrl);
/* Set the actual queue state. */
dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED;
return 0;
}
/**
* Rx queue stop. Device queue goes to the RESET state,
* all involved mbufs are freed from WQ.
*
* @param dev
* Pointer to Ethernet device structure.
* @param idx
* RX queue index.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t idx)
{
eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
int ret;
if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) {
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
DRV_LOG(ERR, "Hairpin queue can't be stopped");
rte_errno = EINVAL;
return -EINVAL;
}
if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED)
return 0;
/*
* Vectorized Rx burst requires the CQ and RQ indices
* synchronized, that might be broken on RQ restart
* and cause Rx malfunction, so queue stopping is
* not supported if vectorized Rx burst is engaged.
* The routine pointer depends on the process
* type, should perform check there.
*/
if (pkt_burst == mlx5_rx_burst_vec) {
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
DRV_LOG(ERR, "Rx queue stop is not supported "
"for vectorized Rx");
rte_errno = EINVAL;
return -EINVAL;
}
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
ret = mlx5_mp_os_req_queue_control(dev, idx,
MLX5_MP_REQ_QUEUE_RX_STOP);
} else {
ret = mlx5_rx_queue_stop_primary(dev, idx);
}
return ret;
}
/**
* Rx queue start. Device queue goes to the ready state,
* all required mbufs are allocated and WQ is replenished.
*
* @param dev
* Pointer to Ethernet device structure.
* @param idx
* RX queue index.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq;
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
int ret;
MLX5_ASSERT(rxq != NULL && rxq->ctrl != NULL);
MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
/* Allocate needed buffers. */
ret = rxq_alloc_elts(rxq->ctrl);
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
if (ret) {
DRV_LOG(ERR, "Cannot reallocate buffers for Rx WQ");
rte_errno = errno;
return ret;
}
rte_io_wmb();
*rxq_data->cq_db = rte_cpu_to_be_32(rxq_data->cq_ci);
rte_io_wmb();
/* Reset RQ consumer before moving queue to READY state. */
*rxq_data->rq_db = rte_cpu_to_be_32(0);
rte_io_wmb();
ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RST2RDY);
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
if (ret) {
DRV_LOG(ERR, "Cannot change Rx WQ state to READY: %s",
strerror(errno));
rte_errno = errno;
return ret;
}
/* Reinitialize RQ - set WQEs. */
mlx5_rxq_initialize(rxq_data);
rxq_data->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
/* Set actual queue state. */
dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
return 0;
}
/**
* Rx queue start. Device queue goes to the ready state,
* all required mbufs are allocated and WQ is replenished.
*
* @param dev
* Pointer to Ethernet device structure.
* @param idx
* RX queue index.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx)
{
int ret;
if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) {
net/mlx5: add queue start and stop The mlx5 PMD did not support queue_start and queue_stop eth_dev API routines, queue could not be suspended and resumed during device operation. There is the use case when this feature is crucial for applications: - there is the secondary process handling the queue - secondary process crashed/aborted - some mbufs were allocated or used by secondary application - some mbufs were allocated by Rx queues to receive packets - some mbufs were placed to send queue - queue goes to undefined state In this case there was no reliable way to recovery queue handling by restarted secondary process but reset queue to initial state freeing all involved resources, including buffers involved in queue operations, reset the mbuf pools, and then reinitialize queue to working state: - reset mbuf pool, allocate all mbuf to initialize pool into safe state after the crush and allow safe mbuf free calls - stop queue, free all potentially involved mbufs - reset mbuf pool again - start queue, reallocate mbufs needed This patch introduces the queue start/stop feature with some limitations: - hairpin queues are not supported - it is application responsibility to synchronize start/stop with datapath routines, rx/tx_burst must be suspended during the queue_start/queue_stop calls - it is application responsibility to track queue usage and provide coordinated queue_start/queue_stop calls from secondary and primary processes. - Rx queues with vectorized Rx routine and engaged CQE compression are not supported by this patch currently Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
2020-07-19 15:35:37 +00:00
DRV_LOG(ERR, "Hairpin queue can't be started");
rte_errno = EINVAL;
return -EINVAL;
}
if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED)
return 0;
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
ret = mlx5_mp_os_req_queue_control(dev, idx,
MLX5_MP_REQ_QUEUE_RX_START);
} else {
ret = mlx5_rx_queue_start_primary(dev, idx);
}
return ret;
}
/**
* Rx queue presetup checks.
*
* @param dev
* Pointer to Ethernet device structure.
* @param idx
* RX queue index.
* @param desc
* Number of descriptors to configure in queue.
* @param[out] rxq_ctrl
* Address of pointer to shared Rx queue control.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc,
struct mlx5_rxq_ctrl **rxq_ctrl)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_priv *rxq;
bool empty;
if (!rte_is_power_of_2(*desc)) {
*desc = 1 << log2above(*desc);
DRV_LOG(WARNING,
"port %u increased number of descriptors in Rx queue %u"
" to the next power of two (%d)",
dev->data->port_id, idx, *desc);
}
DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors",
dev->data->port_id, idx, *desc);
if (idx >= priv->rxqs_n) {
DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)",
dev->data->port_id, idx, priv->rxqs_n);
rte_errno = EOVERFLOW;
return -rte_errno;
}
if (rxq_ctrl == NULL || *rxq_ctrl == NULL)
return 0;
if (!(*rxq_ctrl)->rxq.shared) {
if (!mlx5_rxq_releasable(dev, idx)) {
DRV_LOG(ERR, "port %u unable to release queue index %u",
dev->data->port_id, idx);
rte_errno = EBUSY;
return -rte_errno;
}
mlx5_rxq_release(dev, idx);
} else {
if ((*rxq_ctrl)->obj != NULL)
/* Some port using shared Rx queue has been started. */
return 0;
/* Release all owner RxQ to reconfigure Shared RxQ. */
do {
rxq = LIST_FIRST(&(*rxq_ctrl)->owners);
LIST_REMOVE(rxq, owner_entry);
empty = LIST_EMPTY(&(*rxq_ctrl)->owners);
mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx);
} while (!empty);
*rxq_ctrl = NULL;
}
return 0;
}
/**
* Get the shared Rx queue object that matches group and queue index.
*
* @param dev
* Pointer to Ethernet device structure.
* @param group
* Shared RXQ group.
* @param share_qid
* Shared RX queue index.
*
* @return
* Shared RXQ object that matching, or NULL if not found.
*/
static struct mlx5_rxq_ctrl *
mlx5_shared_rxq_get(struct rte_eth_dev *dev, uint32_t group, uint16_t share_qid)
{
struct mlx5_rxq_ctrl *rxq_ctrl;
struct mlx5_priv *priv = dev->data->dev_private;
LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) {
if (rxq_ctrl->share_group == group &&
rxq_ctrl->share_qid == share_qid)
return rxq_ctrl;
}
return NULL;
}
/**
* Check whether requested Rx queue configuration matches shared RXQ.
*
* @param rxq_ctrl
* Pointer to shared RXQ.
* @param dev
* Pointer to Ethernet device structure.
* @param idx
* Queue index.
* @param desc
* Number of descriptors to configure in queue.
* @param socket
* NUMA socket on which memory must be allocated.
* @param[in] conf
* Thresholds parameters.
* @param mp
* Memory pool for buffer allocations.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static bool
mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl, struct rte_eth_dev *dev,
uint16_t idx, uint16_t desc, unsigned int socket,
const struct rte_eth_rxconf *conf,
struct rte_mempool *mp)
{
struct mlx5_priv *spriv = LIST_FIRST(&rxq_ctrl->owners)->priv;
struct mlx5_priv *priv = dev->data->dev_private;
unsigned int i;
RTE_SET_USED(conf);
if (rxq_ctrl->socket != socket) {
DRV_LOG(ERR, "port %u queue index %u failed to join shared group: socket mismatch",
dev->data->port_id, idx);
return false;
}
if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
DRV_LOG(ERR, "port %u queue index %u failed to join shared group: descriptor number mismatch",
dev->data->port_id, idx);
return false;
}
if (priv->mtu != spriv->mtu) {
DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch",
dev->data->port_id, idx);
return false;
}
if (priv->dev_data->dev_conf.intr_conf.rxq !=
spriv->dev_data->dev_conf.intr_conf.rxq) {
DRV_LOG(ERR, "port %u queue index %u failed to join shared group: interrupt mismatch",
dev->data->port_id, idx);
return false;
}
if (mp != NULL && rxq_ctrl->rxq.mp != mp) {
DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mempool mismatch",
dev->data->port_id, idx);
return false;
} else if (mp == NULL) {
if (conf->rx_nseg != rxq_ctrl->rxseg_n) {
DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment number mismatch",
dev->data->port_id, idx);
return false;
}
for (i = 0; i < conf->rx_nseg; i++) {
if (memcmp(&conf->rx_seg[i].split, &rxq_ctrl->rxseg[i],
sizeof(struct rte_eth_rxseg_split))) {
DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment %u configuration mismatch",
dev->data->port_id, idx, i);
return false;
}
}
}
if (priv->config.hw_padding != spriv->config.hw_padding) {
DRV_LOG(ERR, "port %u queue index %u failed to join shared group: padding mismatch",
dev->data->port_id, idx);
return false;
}
if (priv->config.cqe_comp != spriv->config.cqe_comp ||
(priv->config.cqe_comp &&
priv->config.cqe_comp_fmt != spriv->config.cqe_comp_fmt)) {
DRV_LOG(ERR, "port %u queue index %u failed to join shared group: CQE compression mismatch",
dev->data->port_id, idx);
return false;
}
return true;
}
/**
*
* @param dev
* Pointer to Ethernet device structure.
* @param idx
* RX queue index.
* @param desc
* Number of descriptors to configure in queue.
* @param socket
* NUMA socket on which memory must be allocated.
* @param[in] conf
* Thresholds parameters.
* @param mp
* Memory pool for buffer allocations.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
unsigned int socket, const struct rte_eth_rxconf *conf,
struct rte_mempool *mp)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_priv *rxq;
struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
struct rte_eth_rxseg_split *rx_seg =
(struct rte_eth_rxseg_split *)conf->rx_seg;
struct rte_eth_rxseg_split rx_single = {.mp = mp};
uint16_t n_seg = conf->rx_nseg;
int res;
uint64_t offloads = conf->offloads |
dev->data->dev_conf.rxmode.offloads;
bool is_extmem = false;
if (mp) {
/*
* The parameters should be checked on rte_eth_dev layer.
* If mp is specified it means the compatible configuration
* without buffer split feature tuning.
*/
rx_seg = &rx_single;
n_seg = 1;
is_extmem = rte_pktmbuf_priv_flags(mp) &
RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF;
}
if (n_seg > 1) {
/* The offloads should be checked on rte_eth_dev layer. */
MLX5_ASSERT(offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) {
DRV_LOG(ERR, "port %u queue index %u split "
"offload not configured",
dev->data->port_id, idx);
rte_errno = ENOSPC;
return -rte_errno;
}
MLX5_ASSERT(n_seg < MLX5_MAX_RXQ_NSEG);
}
if (conf->share_group > 0) {
if (!priv->sh->cdev->config.hca_attr.mem_rq_rmp) {
DRV_LOG(ERR, "port %u queue index %u shared Rx queue not supported by fw",
dev->data->port_id, idx);
rte_errno = EINVAL;
return -rte_errno;
}
if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) {
DRV_LOG(ERR, "port %u queue index %u shared Rx queue needs DevX api",
dev->data->port_id, idx);
rte_errno = EINVAL;
return -rte_errno;
}
if (conf->share_qid >= priv->rxqs_n) {
DRV_LOG(ERR, "port %u shared Rx queue index %u > number of Rx queues %u",
dev->data->port_id, conf->share_qid,
priv->rxqs_n);
rte_errno = EINVAL;
return -rte_errno;
}
if (priv->config.mprq.enabled) {
DRV_LOG(ERR, "port %u shared Rx queue index %u: not supported when MPRQ enabled",
dev->data->port_id, conf->share_qid);
rte_errno = EINVAL;
return -rte_errno;
}
/* Try to reuse shared RXQ. */
rxq_ctrl = mlx5_shared_rxq_get(dev, conf->share_group,
conf->share_qid);
if (rxq_ctrl != NULL &&
!mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket,
conf, mp)) {
rte_errno = EINVAL;
return -rte_errno;
}
}
res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl);
if (res)
return res;
/* Allocate RXQ. */
rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0,
SOCKET_ID_ANY);
if (!rxq) {
DRV_LOG(ERR, "port %u unable to allocate rx queue index %u private data",
dev->data->port_id, idx);
rte_errno = ENOMEM;
return -rte_errno;
}
if (rxq_ctrl == NULL) {
rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, rx_seg,
n_seg, is_extmem);
if (rxq_ctrl == NULL) {
DRV_LOG(ERR, "port %u unable to allocate rx queue index %u",
dev->data->port_id, idx);
mlx5_free(rxq);
rte_errno = ENOMEM;
return -rte_errno;
}
}
rxq->priv = priv;
rxq->idx = idx;
(*priv->rxq_privs)[idx] = rxq;
/* Join owner list. */
LIST_INSERT_HEAD(&rxq_ctrl->owners, rxq, owner_entry);
rxq->ctrl = rxq_ctrl;
mlx5_rxq_ref(dev, idx);
DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
dev->data->port_id, idx);
dev->data->rx_queues[idx] = &rxq_ctrl->rxq;
return 0;
}
/**
*
* @param dev
* Pointer to Ethernet device structure.
* @param idx
* RX queue index.
* @param desc
* Number of descriptors to configure in queue.
* @param hairpin_conf
* Hairpin configuration parameters.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
uint16_t desc,
const struct rte_eth_hairpin_conf *hairpin_conf)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_priv *rxq;
struct mlx5_rxq_ctrl *rxq_ctrl;
int res;
res = mlx5_rx_queue_pre_setup(dev, idx, &desc, NULL);
if (res)
return res;
if (hairpin_conf->peer_count != 1) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue index %u"
" peer count is %u", dev->data->port_id,
idx, hairpin_conf->peer_count);
return -rte_errno;
}
if (hairpin_conf->peers[0].port == dev->data->port_id) {
if (hairpin_conf->peers[0].queue >= priv->txqs_n) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue"
" index %u, Tx %u is larger than %u",
dev->data->port_id, idx,
hairpin_conf->peers[0].queue, priv->txqs_n);
return -rte_errno;
}
} else {
if (hairpin_conf->manual_bind == 0 ||
hairpin_conf->tx_explicit == 0) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue"
" index %u peer port %u with attributes %u %u",
dev->data->port_id, idx,
hairpin_conf->peers[0].port,
hairpin_conf->manual_bind,
hairpin_conf->tx_explicit);
return -rte_errno;
}
}
rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0,
SOCKET_ID_ANY);
if (!rxq) {
DRV_LOG(ERR, "port %u unable to allocate hairpin rx queue index %u private data",
dev->data->port_id, idx);
rte_errno = ENOMEM;
return -rte_errno;
}
rxq->priv = priv;
rxq->idx = idx;
(*priv->rxq_privs)[idx] = rxq;
rxq_ctrl = mlx5_rxq_hairpin_new(dev, rxq, desc, hairpin_conf);
if (!rxq_ctrl) {
DRV_LOG(ERR, "port %u unable to allocate hairpin queue index %u",
dev->data->port_id, idx);
mlx5_free(rxq);
(*priv->rxq_privs)[idx] = NULL;
rte_errno = ENOMEM;
return -rte_errno;
}
DRV_LOG(DEBUG, "port %u adding hairpin Rx queue %u to list",
dev->data->port_id, idx);
dev->data->rx_queues[idx] = &rxq_ctrl->rxq;
return 0;
}
/**
* DPDK callback to release a RX queue.
*
* @param dev
* Pointer to Ethernet device structure.
* @param qid
* Receive queue index.
*/
void
mlx5_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
{
struct mlx5_rxq_data *rxq = dev->data->rx_queues[qid];
if (rxq == NULL)
return;
if (!mlx5_rxq_releasable(dev, qid))
rte_panic("port %u Rx queue %u is still used by a flow and"
" cannot be removed\n", dev->data->port_id, qid);
mlx5_rxq_release(dev, qid);
}
/**
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
* Allocate queue vector and fill epoll fd list for Rx interrupts.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
unsigned int i;
unsigned int rxqs_n = priv->rxqs_n;
unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
unsigned int count = 0;
struct rte_intr_handle *intr_handle = dev->intr_handle;
if (!dev->data->dev_conf.intr_conf.rxq)
return 0;
mlx5_rx_intr_vec_disable(dev);
if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) {
DRV_LOG(ERR,
"port %u failed to allocate memory for interrupt"
" vector, Rx interrupts will not be supported",
dev->data->port_id);
rte_errno = ENOMEM;
return -rte_errno;
}
if (rte_intr_type_set(intr_handle, RTE_INTR_HANDLE_EXT))
return -rte_errno;
for (i = 0; i != n; ++i) {
/* This rxq obj must not be released in this function. */
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
struct mlx5_rxq_obj *rxq_obj = rxq ? rxq->ctrl->obj : NULL;
int rc;
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
/* Skip queues that cannot request interrupts. */
if (!rxq_obj || (!rxq_obj->ibv_channel &&
!rxq_obj->devx_channel)) {
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
/* Use invalid intr_vec[] index to disable entry. */
if (rte_intr_vec_list_index_set(intr_handle, i,
RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID))
return -rte_errno;
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
continue;
}
mlx5_rxq_ref(dev, i);
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
DRV_LOG(ERR,
"port %u too many Rx queues for interrupt"
" vector size (%d), Rx interrupts cannot be"
" enabled",
dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID);
mlx5_rx_intr_vec_disable(dev);
rte_errno = ENOMEM;
return -rte_errno;
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
}
rc = mlx5_os_set_nonblock_channel_fd(rxq_obj->fd);
if (rc < 0) {
rte_errno = errno;
DRV_LOG(ERR,
"port %u failed to make Rx interrupt file"
" descriptor %d non-blocking for queue index"
" %d",
dev->data->port_id, rxq_obj->fd, i);
mlx5_rx_intr_vec_disable(dev);
return -rte_errno;
}
if (rte_intr_vec_list_index_set(intr_handle, i,
RTE_INTR_VEC_RXTX_OFFSET + count))
return -rte_errno;
if (rte_intr_efds_index_set(intr_handle, count,
rxq_obj->fd))
return -rte_errno;
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
count++;
}
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
if (!count)
mlx5_rx_intr_vec_disable(dev);
else if (rte_intr_nb_efd_set(intr_handle, count))
return -rte_errno;
return 0;
}
/**
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
* Clean up Rx interrupts handler.
*
* @param dev
* Pointer to Ethernet device.
*/
void
mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct rte_intr_handle *intr_handle = dev->intr_handle;
unsigned int i;
unsigned int rxqs_n = priv->rxqs_n;
unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
if (!dev->data->dev_conf.intr_conf.rxq)
return;
if (rte_intr_vec_list_index_get(intr_handle, 0) < 0)
goto free;
for (i = 0; i != n; ++i) {
if (rte_intr_vec_list_index_get(intr_handle, i) ==
RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID)
continue;
/**
* Need to access directly the queue to release the reference
* kept in mlx5_rx_intr_vec_enable().
*/
mlx5_rxq_deref(dev, i);
}
free:
rte_intr_free_epoll_fd(intr_handle);
rte_intr_vec_list_free(intr_handle);
rte_intr_nb_efd_set(intr_handle, 0);
}
/**
* MLX5 CQ notification .
*
* @param rxq
* Pointer to receive queue structure.
* @param sq_n_rxq
* Sequence number per receive queue .
*/
static inline void
mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
{
int sq_n = 0;
uint32_t doorbell_hi;
uint64_t doorbell;
sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
doorbell = (uint64_t)doorbell_hi << 32;
doorbell |= rxq->cqn;
common/mlx5: fix post doorbell barrier The rdma-core library can map doorbell register in two ways, depending on the environment variable "MLX5_SHUT_UP_BF": - as regular cached memory, the variable is either missing or set to zero. This type of mapping may cause the significant doorbell register writing latency and requires an explicit memory write barrier to mitigate this issue and prevent write combining. - as non-cached memory, the variable is present and set to not "0" value. This type of mapping may cause performance impact under heavy loading conditions but the explicit write memory barrier is not required and it may improve core performance. The UAR creation function maps a doorbell in one of the above ways according to the system. In run time, it always adds an explicit memory barrier after writing to. In cases where the doorbell was mapped as non-cached memory, the explicit memory barrier is unnecessary and may impair performance. The commit [1] solved this problem for a Tx queue. In run time, it checks the mapping type and provides the memory barrier after writing to a Tx doorbell register if it is needed. The mapping type is extracted directly from the uar_mmap_offset field in the queue properties. This patch shares this code between the drivers and extends the above solution for each of them. [1] commit 8409a28573d3 ("net/mlx5: control transmit doorbell register mapping") Fixes: f8c97babc9f4 ("compress/mlx5: add data-path functions") Fixes: 8e196c08ab53 ("crypto/mlx5: support enqueue/dequeue operations") Fixes: 4d4e245ad637 ("regex/mlx5: support enqueue") Cc: stable@dpdk.org Signed-off-by: Michael Baum <michaelba@nvidia.com> Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com> Acked-by: Matan Azrad <matan@nvidia.com>
2021-11-03 18:35:13 +00:00
mlx5_doorbell_ring(&rxq->uar_data, rte_cpu_to_be_64(doorbell),
doorbell_hi, &rxq->cq_db[MLX5_CQ_ARM_DB], 0);
}
/**
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
* DPDK callback for Rx queue interrupt enable.
*
* @param dev
* Pointer to Ethernet device structure.
* @param rx_queue_id
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
* Rx queue number.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
{
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id);
if (!rxq)
goto error;
if (rxq->ctrl->irq) {
if (!rxq->ctrl->obj)
goto error;
mlx5_arm_cq(&rxq->ctrl->rxq, rxq->ctrl->rxq.cq_arm_sn);
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
}
return 0;
error:
rte_errno = EINVAL;
return -rte_errno;
}
/**
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
* DPDK callback for Rx queue interrupt disable.
*
* @param dev
* Pointer to Ethernet device structure.
* @param rx_queue_id
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
* Rx queue number.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id);
int ret = 0;
if (!rxq) {
rte_errno = EINVAL;
return -rte_errno;
}
if (!rxq->ctrl->obj)
goto error;
if (rxq->ctrl->irq) {
ret = priv->obj_ops.rxq_event_get(rxq->ctrl->obj);
if (ret < 0)
goto error;
rxq->ctrl->rxq.cq_arm_sn++;
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
}
return 0;
error:
/**
* The ret variable may be EAGAIN which means the get_event function was
* called before receiving one.
*/
if (ret < 0)
rte_errno = errno;
else
rte_errno = EINVAL;
if (rte_errno != EAGAIN)
DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d",
dev->data->port_id, rx_queue_id);
return -rte_errno;
}
/**
* Verify the Rx queue objects list is empty
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* The number of objects not released.
*/
int
mlx5_rxq_obj_verify(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
int ret = 0;
struct mlx5_rxq_obj *rxq_obj;
LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) {
if (rxq_obj->rxq_ctrl == NULL)
continue;
if (rxq_obj->rxq_ctrl->rxq.shared &&
!LIST_EMPTY(&rxq_obj->rxq_ctrl->owners))
continue;
DRV_LOG(DEBUG, "port %u Rx queue %u still referenced",
dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx);
++ret;
}
return ret;
}
/**
* Callback function to initialize mbufs for Multi-Packet RQ.
*/
static inline void
mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg,
void *_m, unsigned int i __rte_unused)
{
struct mlx5_mprq_buf *buf = _m;
struct rte_mbuf_ext_shared_info *shinfo;
unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg;
unsigned int j;
memset(_m, 0, sizeof(*buf));
buf->mp = mp;
__atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED);
for (j = 0; j != strd_n; ++j) {
shinfo = &buf->shinfos[j];
shinfo->free_cb = mlx5_mprq_buf_free_cb;
shinfo->fcb_opaque = buf;
}
}
/**
* Free mempool of Multi-Packet RQ.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* 0 on success, negative errno value on failure.
*/
int
mlx5_mprq_free_mp(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct rte_mempool *mp = priv->mprq_mp;
unsigned int i;
if (mp == NULL)
return 0;
DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ",
dev->data->port_id, mp->name);
/*
* If a buffer in the pool has been externally attached to a mbuf and it
* is still in use by application, destroying the Rx queue can spoil
* the packet. It is unlikely to happen but if application dynamically
* creates and destroys with holding Rx packets, this can happen.
*
* TODO: It is unavoidable for now because the mempool for Multi-Packet
* RQ isn't provided by application but managed by PMD.
*/
if (!rte_mempool_full(mp)) {
DRV_LOG(ERR,
"port %u mempool for Multi-Packet RQ is still in use",
dev->data->port_id);
rte_errno = EBUSY;
return -rte_errno;
}
rte_mempool_free(mp);
/* Unset mempool for each Rx queue. */
for (i = 0; i != priv->rxqs_n; ++i) {
struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, i);
if (rxq == NULL)
continue;
rxq->mprq_mp = NULL;
}
priv->mprq_mp = NULL;
return 0;
}
/**
* Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the
* mempool. If already allocated, reuse it if there're enough elements.
* Otherwise, resize it.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* 0 on success, negative errno value on failure.
*/
int
mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct rte_mempool *mp = priv->mprq_mp;
char name[RTE_MEMPOOL_NAMESIZE];
unsigned int desc = 0;
unsigned int buf_len;
unsigned int obj_num;
unsigned int obj_size;
unsigned int log_strd_num = 0;
unsigned int log_strd_sz = 0;
unsigned int i;
unsigned int n_ibv = 0;
int ret;
if (!mlx5_mprq_enabled(dev))
return 0;
/* Count the total number of descriptors configured. */
for (i = 0; i != priv->rxqs_n; ++i) {
struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i);
struct mlx5_rxq_data *rxq;
if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin)
continue;
rxq = &rxq_ctrl->rxq;
n_ibv++;
desc += 1 << rxq->elts_n;
/* Get the max number of strides. */
if (log_strd_num < rxq->log_strd_num)
log_strd_num = rxq->log_strd_num;
/* Get the max size of a stride. */
if (log_strd_sz < rxq->log_strd_sz)
log_strd_sz = rxq->log_strd_sz;
}
MLX5_ASSERT(log_strd_num && log_strd_sz);
buf_len = RTE_BIT32(log_strd_num) * RTE_BIT32(log_strd_sz);
obj_size = sizeof(struct mlx5_mprq_buf) + buf_len +
RTE_BIT32(log_strd_num) *
sizeof(struct rte_mbuf_ext_shared_info) +
RTE_PKTMBUF_HEADROOM;
/*
* Received packets can be either memcpy'd or externally referenced. In
* case that the packet is attached to an mbuf as an external buffer, as
* it isn't possible to predict how the buffers will be queued by
* application, there's no option to exactly pre-allocate needed buffers
* in advance but to speculatively prepares enough buffers.
*
* In the data path, if this Mempool is depleted, PMD will try to memcpy
* received packets to buffers provided by application (rxq->mp) until
* this Mempool gets available again.
*/
desc *= 4;
obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv;
/*
* rte_mempool_create_empty() has sanity check to refuse large cache
* size compared to the number of elements.
* CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a
* constant number 2 instead.
*/
obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2);
/* Check a mempool is already allocated and if it can be resued. */
if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) {
DRV_LOG(DEBUG, "port %u mempool %s is being reused",
dev->data->port_id, mp->name);
/* Reuse. */
goto exit;
} else if (mp != NULL) {
DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it",
dev->data->port_id, mp->name);
/*
* If failed to free, which means it may be still in use, no way
* but to keep using the existing one. On buffer underrun,
* packets will be memcpy'd instead of external buffer
* attachment.
*/
if (mlx5_mprq_free_mp(dev)) {
if (mp->elt_size >= obj_size)
goto exit;
else
return -rte_errno;
}
}
snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id);
mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ,
0, NULL, NULL, mlx5_mprq_buf_init,
(void *)((uintptr_t)1 << log_strd_num),
dev->device->numa_node, 0);
if (mp == NULL) {
DRV_LOG(ERR,
"port %u failed to allocate a mempool for"
" Multi-Packet RQ, count=%u, size=%u",
dev->data->port_id, obj_num, obj_size);
rte_errno = ENOMEM;
return -rte_errno;
}
common/mlx5: fix mempool registration Mempool registration was not correctly processing mempools with RTE_PKTMBUF_F_PINEND_EXT_BUF flag set ("pinned mempools" for short), because it is not known at registration time whether the mempool is a pktmbuf one, and its elements may not yet be initialized to analyze them. Attempts had been made to recognize such pools, but there was no robust solution, only the owner of a mempool (the application or a device) knows its type. This patch extends common/mlx5 registration code to accept a hint that the mempool is a pinned one and uses this capability from net/mlx5 driver. 1. Remove all code assuming pktmbuf pool type or trying to recognize the type of a pool. 2. Register pinned mempools used for Rx and their external memory on port start. Populate the MR cache with all their MRs. 3. Change Tx slow path logic as follows: 3.1. Search the mempool database for a memory region (MR) by the mbuf pool and its buffer address. 3.2. If not MR for the address is found for the mempool, and the mempool contains only pinned external buffers, perform the mempool registration of the mempool and its external pinned memory. 3.3. Fall back to using page-based MRs in other cases (for example, a buffer with externally attached memory, but not from a pinned mempool). Fixes: 690b2a88c2f7 ("common/mlx5: add mempool registration facilities") Fixes: fec28ca0e3a9 ("net/mlx5: support mempool registration") Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com> Reviewed-by: Matan Azrad <matan@nvidia.com> Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
2021-11-19 14:31:56 +00:00
ret = mlx5_mr_mempool_register(priv->sh->cdev, mp, false);
if (ret < 0 && rte_errno != EEXIST) {
ret = rte_errno;
DRV_LOG(ERR, "port %u failed to register a mempool for Multi-Packet RQ",
dev->data->port_id);
rte_mempool_free(mp);
rte_errno = ret;
return -rte_errno;
}
priv->mprq_mp = mp;
exit:
/* Set mempool for each Rx queue. */
for (i = 0; i != priv->rxqs_n; ++i) {
struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i);
if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin)
continue;
rxq_ctrl->rxq.mprq_mp = mp;
}
DRV_LOG(INFO, "port %u Multi-Packet RQ is configured",
dev->data->port_id);
return 0;
}
#define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \
sizeof(struct rte_vlan_hdr) * 2 + \
sizeof(struct rte_ipv6_hdr)))
#define MAX_TCP_OPTION_SIZE 40u
#define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \
sizeof(struct rte_tcp_hdr) + \
MAX_TCP_OPTION_SIZE))
/**
* Adjust the maximum LRO massage size.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
* @param max_lro_size
* The maximum size for LRO packet.
*/
static void
mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx,
uint32_t max_lro_size)
{
struct mlx5_priv *priv = dev->data->dev_private;
if (priv->sh->cdev->config.hca_attr.lro_max_msg_sz_mode ==
MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size >
MLX5_MAX_TCP_HDR_OFFSET)
max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET;
max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE);
MLX5_ASSERT(max_lro_size >= MLX5_LRO_SEG_CHUNK_SIZE);
max_lro_size /= MLX5_LRO_SEG_CHUNK_SIZE;
if (priv->max_lro_msg_size)
priv->max_lro_msg_size =
RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size);
else
priv->max_lro_msg_size = max_lro_size;
DRV_LOG(DEBUG,
"port %u Rx Queue %u max LRO message size adjusted to %u bytes",
dev->data->port_id, idx,
priv->max_lro_msg_size * MLX5_LRO_SEG_CHUNK_SIZE);
}
/**
* Prepare both size and number of stride for Multi-Packet RQ.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
* @param desc
* Number of descriptors to configure in queue.
* @param rx_seg_en
* Indicator if Rx segment enables, if so Multi-Packet RQ doesn't enable.
* @param min_mbuf_size
* Non scatter min mbuf size, max_rx_pktlen plus overhead.
* @param actual_log_stride_num
* Log number of strides to configure for this queue.
* @param actual_log_stride_size
* Log stride size to configure for this queue.
* @param is_extmem
* Is external pinned memory pool used.
* @return
* 0 if Multi-Packet RQ is supported, otherwise -1.
*/
static int
mlx5_mprq_prepare(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
bool rx_seg_en, uint32_t min_mbuf_size,
uint32_t *actual_log_stride_num,
uint32_t *actual_log_stride_size,
bool is_extmem)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_port_config *config = &priv->config;
struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap;
uint32_t log_min_stride_num = dev_cap->mprq.log_min_stride_num;
uint32_t log_max_stride_num = dev_cap->mprq.log_max_stride_num;
uint32_t log_def_stride_num =
RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM,
log_min_stride_num),
log_max_stride_num);
uint32_t log_min_stride_size = dev_cap->mprq.log_min_stride_size;
uint32_t log_max_stride_size = dev_cap->mprq.log_max_stride_size;
uint32_t log_def_stride_size =
RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE,
log_min_stride_size),
log_max_stride_size);
uint32_t log_stride_wqe_size;
if (mlx5_check_mprq_support(dev) != 1 || rx_seg_en || is_extmem)
goto unsupport;
/* Checks if chosen number of strides is in supported range. */
if (config->mprq.log_stride_num > log_max_stride_num ||
config->mprq.log_stride_num < log_min_stride_num) {
*actual_log_stride_num = log_def_stride_num;
DRV_LOG(WARNING,
"Port %u Rx queue %u number of strides for Multi-Packet RQ is out of range, setting default value (%u)",
dev->data->port_id, idx, RTE_BIT32(log_def_stride_num));
} else {
*actual_log_stride_num = config->mprq.log_stride_num;
}
if (config->mprq.log_stride_size) {
/* Checks if chosen size of stride is in supported range. */
if (config->mprq.log_stride_size > log_max_stride_size ||
config->mprq.log_stride_size < log_min_stride_size) {
*actual_log_stride_size = log_def_stride_size;
DRV_LOG(WARNING,
"Port %u Rx queue %u size of a stride for Multi-Packet RQ is out of range, setting default value (%u)",
dev->data->port_id, idx,
RTE_BIT32(log_def_stride_size));
} else {
*actual_log_stride_size = config->mprq.log_stride_size;
}
} else {
if (min_mbuf_size <= RTE_BIT32(log_max_stride_size))
*actual_log_stride_size = log2above(min_mbuf_size);
else
goto unsupport;
}
log_stride_wqe_size = *actual_log_stride_num + *actual_log_stride_size;
/* Check if WQE buffer size is supported by hardware. */
if (log_stride_wqe_size < dev_cap->mprq.log_min_stride_wqe_size) {
*actual_log_stride_num = log_def_stride_num;
*actual_log_stride_size = log_def_stride_size;
DRV_LOG(WARNING,
"Port %u Rx queue %u size of WQE buffer for Multi-Packet RQ is too small, setting default values (stride_num_n=%u, stride_size_n=%u)",
dev->data->port_id, idx, RTE_BIT32(log_def_stride_num),
RTE_BIT32(log_def_stride_size));
log_stride_wqe_size = log_def_stride_num + log_def_stride_size;
}
MLX5_ASSERT(log_stride_wqe_size >=
dev_cap->mprq.log_min_stride_wqe_size);
if (desc <= RTE_BIT32(*actual_log_stride_num))
goto unsupport;
if (min_mbuf_size > RTE_BIT32(log_stride_wqe_size)) {
DRV_LOG(WARNING, "Port %u Rx queue %u "
"Multi-Packet RQ is unsupported, WQE buffer size (%u) "
"is smaller than min mbuf size (%u)",
dev->data->port_id, idx, RTE_BIT32(log_stride_wqe_size),
min_mbuf_size);
goto unsupport;
}
DRV_LOG(DEBUG, "Port %u Rx queue %u "
"Multi-Packet RQ is enabled strd_num_n = %u, strd_sz_n = %u",
dev->data->port_id, idx, RTE_BIT32(*actual_log_stride_num),
RTE_BIT32(*actual_log_stride_size));
return 0;
unsupport:
if (config->mprq.enabled)
DRV_LOG(WARNING,
"Port %u MPRQ is requested but cannot be enabled\n"
" (requested: pkt_sz = %u, desc_num = %u,"
" rxq_num = %u, stride_sz = %u, stride_num = %u\n"
" supported: min_rxqs_num = %u, min_buf_wqe_sz = %u"
" min_stride_sz = %u, max_stride_sz = %u).\n"
"Rx segment is %senabled. External mempool is %sused.",
dev->data->port_id, min_mbuf_size, desc, priv->rxqs_n,
RTE_BIT32(config->mprq.log_stride_size),
RTE_BIT32(config->mprq.log_stride_num),
config->mprq.min_rxqs_num,
RTE_BIT32(dev_cap->mprq.log_min_stride_wqe_size),
RTE_BIT32(dev_cap->mprq.log_min_stride_size),
RTE_BIT32(dev_cap->mprq.log_max_stride_size),
rx_seg_en ? "" : "not ", is_extmem ? "" : "not ");
return -1;
}
/**
* Create a DPDK Rx queue.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
* @param desc
* Number of descriptors to configure in queue.
* @param socket
* NUMA socket on which memory must be allocated.
*
* @return
* A DPDK queue object on success, NULL otherwise and rte_errno is set.
*/
struct mlx5_rxq_ctrl *
mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
unsigned int socket, const struct rte_eth_rxconf *conf,
const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg,
bool is_extmem)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_ctrl *tmpl;
unsigned int mb_len = rte_pktmbuf_data_room_size(rx_seg[0].mp);
struct mlx5_port_config *config = &priv->config;
ethdev: new Rx/Tx offloads API This patch check if a input requested offloading is valid or not. Any reuqested offloading must be supported in the device capabilities. Any offloading is disabled by default if it is not set in the parameter dev_conf->[rt]xmode.offloads to rte_eth_dev_configure() and [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If any offloading is enabled in rte_eth_dev_configure() by application, it is enabled on all queues no matter whether it is per-queue or per-port type and no matter whether it is set or cleared in [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If a per-queue offloading hasn't be enabled in rte_eth_dev_configure(), it can be enabled or disabled for individual queue in ret_eth_[rt]x_queue_setup(). A new added offloading is the one which hasn't been enabled in rte_eth_dev_configure() and is reuqested to be enabled in rte_eth_[rt]x_queue_setup(), it must be per-queue type, otherwise trigger an error log. The underlying PMD must be aware that the requested offloadings to PMD specific queue_setup() function only carries those new added offloadings of per-queue type. This patch can make above such checking in a common way in rte_ethdev layer to avoid same checking in underlying PMD. This patch assumes that all PMDs in 18.05-rc2 have already converted to offload API defined in 17.11 . It also assumes that all PMDs can return correct offloading capabilities in rte_eth_dev_infos_get(). In the beginning of [rt]x_queue_setup() of underlying PMD, add offloads = [rt]xconf->offloads | dev->data->dev_conf.[rt]xmode.offloads; to keep same as offload API defined in 17.11 to avoid upper application broken due to offload API change. PMD can use the info that input [rt]xconf->offloads only carry the new added per-queue offloads to do some optimization or some code change on base of this patch. Signed-off-by: Wei Dai <wei.dai@intel.com> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
2018-05-10 11:56:55 +00:00
uint64_t offloads = conf->offloads |
dev->data->dev_conf.rxmode.offloads;
unsigned int lro_on_queue = !!(offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO);
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
unsigned int max_rx_pktlen = lro_on_queue ?
dev->data->dev_conf.rxmode.max_lro_pkt_size :
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
dev->data->mtu + (unsigned int)RTE_ETHER_HDR_LEN +
RTE_ETHER_CRC_LEN;
unsigned int non_scatter_min_mbuf_size = max_rx_pktlen +
RTE_PKTMBUF_HEADROOM;
unsigned int max_lro_size = 0;
unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
uint32_t mprq_log_actual_stride_num = 0;
uint32_t mprq_log_actual_stride_size = 0;
bool rx_seg_en = n_seg != 1 || rx_seg[0].offset || rx_seg[0].length;
const int mprq_en = !mlx5_mprq_prepare(dev, idx, desc, rx_seg_en,
non_scatter_min_mbuf_size,
&mprq_log_actual_stride_num,
&mprq_log_actual_stride_size,
is_extmem);
/*
* Always allocate extra slots, even if eventually
* the vector Rx will not be used.
*/
uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
size_t alloc_size = sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *);
const struct rte_eth_rxseg_split *qs_seg = rx_seg;
unsigned int tail_len;
if (mprq_en) {
/* Trim the number of descs needed. */
desc >>= mprq_log_actual_stride_num;
alloc_size += desc * sizeof(struct mlx5_mprq_buf *);
}
tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket);
if (!tmpl) {
rte_errno = ENOMEM;
return NULL;
}
LIST_INIT(&tmpl->owners);
MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG);
/*
* Save the original segment configuration in the shared queue
* descriptor for the later check on the sibling queue creation.
*/
tmpl->rxseg_n = n_seg;
rte_memcpy(tmpl->rxseg, qs_seg,
sizeof(struct rte_eth_rxseg_split) * n_seg);
/*
* Build the array of actual buffer offsets and lengths.
* Pad with the buffers from the last memory pool if
* needed to handle max size packets, replace zero length
* with the buffer length from the pool.
*/
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
tail_len = max_rx_pktlen;
do {
struct mlx5_eth_rxseg *hw_seg =
&tmpl->rxq.rxseg[tmpl->rxq.rxseg_n];
uint32_t buf_len, offset, seg_len;
/*
* For the buffers beyond descriptions offset is zero,
* the first buffer contains head room.
*/
buf_len = rte_pktmbuf_data_room_size(qs_seg->mp);
offset = (tmpl->rxq.rxseg_n >= n_seg ? 0 : qs_seg->offset) +
(tmpl->rxq.rxseg_n ? 0 : RTE_PKTMBUF_HEADROOM);
/*
* For the buffers beyond descriptions the length is
* pool buffer length, zero lengths are replaced with
* pool buffer length either.
*/
seg_len = tmpl->rxq.rxseg_n >= n_seg ? buf_len :
qs_seg->length ?
qs_seg->length :
(buf_len - offset);
/* Check is done in long int, now overflows. */
if (buf_len < seg_len + offset) {
DRV_LOG(ERR, "port %u Rx queue %u: Split offset/length "
"%u/%u can't be satisfied",
dev->data->port_id, idx,
qs_seg->length, qs_seg->offset);
rte_errno = EINVAL;
goto error;
}
if (seg_len > tail_len)
seg_len = buf_len - offset;
if (++tmpl->rxq.rxseg_n > MLX5_MAX_RXQ_NSEG) {
DRV_LOG(ERR,
"port %u too many SGEs (%u) needed to handle"
" requested maximum packet size %u, the maximum"
" supported are %u", dev->data->port_id,
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
tmpl->rxq.rxseg_n, max_rx_pktlen,
MLX5_MAX_RXQ_NSEG);
rte_errno = ENOTSUP;
goto error;
}
/* Build the actual scattering element in the queue object. */
hw_seg->mp = qs_seg->mp;
MLX5_ASSERT(offset <= UINT16_MAX);
MLX5_ASSERT(seg_len <= UINT16_MAX);
hw_seg->offset = (uint16_t)offset;
hw_seg->length = (uint16_t)seg_len;
/*
* Advance the segment descriptor, the padding is the based
* on the attributes of the last descriptor.
*/
if (tmpl->rxq.rxseg_n < n_seg)
qs_seg++;
tail_len -= RTE_MIN(tail_len, seg_len);
} while (tail_len || !rte_is_power_of_2(tmpl->rxq.rxseg_n));
MLX5_ASSERT(tmpl->rxq.rxseg_n &&
tmpl->rxq.rxseg_n <= MLX5_MAX_RXQ_NSEG);
if (tmpl->rxq.rxseg_n > 1 && !(offloads & RTE_ETH_RX_OFFLOAD_SCATTER)) {
DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not"
" configured and no enough mbuf space(%u) to contain "
"the maximum RX packet length(%u) with head-room(%u)",
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
dev->data->port_id, idx, mb_len, max_rx_pktlen,
RTE_PKTMBUF_HEADROOM);
rte_errno = ENOSPC;
goto error;
}
tmpl->is_hairpin = false;
if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl,
&priv->sh->cdev->mr_scache.dev_gen, socket)) {
net/mlx5: add new memory region support This is the new design of Memory Region (MR) for mlx PMD, in order to: - Accommodate the new memory hotplug model. - Support non-contiguous Mempool. There are multiple layers for MR search. L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized array by linear search. L0/L1 is in an inline function - mlx5_mr_lookup_cache(). If L1 misses, the bottom-half function is called to look up the address from the bigger local cache of the queue. This is L2 - mlx5_mr_addr2mr_bh() and it is not an inline function. Data structure for L2 is the Binary Tree. If L2 misses, the search falls into the slowest path which takes locks in order to access global device cache (priv->mr.cache) which is also a B-tree and caches the original MR list (priv->mr.mr_list) of the device. Unless the global cache is overflowed, it is all-inclusive of the MR list. This is L3 - mlx5_mr_lookup_dev(). The size of the L3 cache table is limited and can't be expanded on the fly due to deadlock. Refer to the comments in the code for the details - mr_lookup_dev(). If L3 is overflowed, the list will have to be searched directly bypassing the cache although it is slower. If L3 misses, a new MR for the address should be created - mlx5_mr_create(). When it creates a new MR, it tries to register adjacent memsegs as much as possible which are virtually contiguous around the address. This must take two locks - memory_hotplug_lock and priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any allocation/free of memory inside. In the free callback of the memory hotplug event, freed space is searched from the MR list and corresponding bits are cleared from the bitmap of MRs. This can fragment a MR and the MR will have multiple search entries in the caches. Once there's a change by the event, the global cache must be rebuilt and all the per-queue caches will be flushed as well. If memory is frequently freed in run-time, that may cause jitter on dataplane processing in the worst case by incurring MR cache flush and rebuild. But, it would be the least probable scenario. To guarantee the most optimal performance, it is highly recommended to use an EAL option - '--socket-mem'. Then, the reserved memory will be pinned and won't be freed dynamically. And it is also recommended to configure per-lcore cache of Mempool. Even though there're many MRs for a device or MRs are highly fragmented, the cache of Mempool will be much helpful to reduce misses on per-queue caches anyway. '--legacy-mem' is also supported. Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 11:09:04 +00:00
/* rte_errno is already set. */
goto error;
}
tmpl->socket = socket;
if (dev->data->dev_conf.intr_conf.rxq)
tmpl->irq = 1;
if (mprq_en) {
/* TODO: Rx scatter isn't supported yet. */
tmpl->rxq.sges_n = 0;
tmpl->rxq.log_strd_num = mprq_log_actual_stride_num;
tmpl->rxq.log_strd_sz = mprq_log_actual_stride_size;
tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
tmpl->rxq.strd_scatter_en =
!!(offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
config->mprq.max_memcpy_len);
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
max_lro_size = RTE_MIN(max_rx_pktlen,
RTE_BIT32(tmpl->rxq.log_strd_num) *
RTE_BIT32(tmpl->rxq.log_strd_sz));
} else if (tmpl->rxq.rxseg_n == 1) {
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
MLX5_ASSERT(max_rx_pktlen <= first_mb_free_size);
tmpl->rxq.sges_n = 0;
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
max_lro_size = max_rx_pktlen;
} else if (offloads & RTE_ETH_RX_OFFLOAD_SCATTER) {
unsigned int sges_n;
if (lro_on_queue && first_mb_free_size <
MLX5_MAX_LRO_HEADER_FIX) {
DRV_LOG(ERR, "Not enough space in the first segment(%u)"
" to include the max header size(%u) for LRO",
first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX);
rte_errno = ENOTSUP;
goto error;
}
/*
* Determine the number of SGEs needed for a full packet
* and round it to the next power of two.
*/
sges_n = log2above(tmpl->rxq.rxseg_n);
if (sges_n > MLX5_MAX_LOG_RQ_SEGS) {
DRV_LOG(ERR,
"port %u too many SGEs (%u) needed to handle"
" requested maximum packet size %u, the maximum"
" supported are %u", dev->data->port_id,
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
1 << sges_n, max_rx_pktlen,
1u << MLX5_MAX_LOG_RQ_SEGS);
rte_errno = ENOTSUP;
goto error;
}
tmpl->rxq.sges_n = sges_n;
ethdev: fix max Rx packet length There is a confusion on setting max Rx packet length, this patch aims to clarify it. 'rte_eth_dev_configure()' API accepts max Rx packet size via 'uint32_t max_rx_pkt_len' field of the config struct 'struct rte_eth_conf'. Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result stored into '(struct rte_eth_dev)->data->mtu'. These two APIs are related but they work in a disconnected way, they store the set values in different variables which makes hard to figure out which one to use, also having two different method for a related functionality is confusing for the users. Other issues causing confusion is: * maximum transmission unit (MTU) is payload of the Ethernet frame. And 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is Ethernet frame overhead, and this overhead may be different from device to device based on what device supports, like VLAN and QinQ. * 'max_rx_pkt_len' is only valid when application requested jumbo frame, which adds additional confusion and some APIs and PMDs already discards this documented behavior. * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory field, this adds configuration complexity for application. As solution, both APIs gets MTU as parameter, and both saves the result in same variable '(struct rte_eth_dev)->data->mtu'. For this 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent from jumbo frame. For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user request and it should be used only within configure function and result should be stored to '(struct rte_eth_dev)->data->mtu'. After that point both application and PMD uses MTU from this variable. When application doesn't provide an MTU during 'rte_eth_dev_configure()' default 'RTE_ETHER_MTU' value is used. Additional clarification done on scattered Rx configuration, in relation to MTU and Rx buffer size. MTU is used to configure the device for physical Rx/Tx size limitation, Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size. PMDs compare MTU against Rx buffer size to decide enabling scattered Rx or not. If scattered Rx is not supported by device, MTU bigger than Rx buffer size should fail. Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Acked-by: Somnath Kotur <somnath.kotur@broadcom.com> Acked-by: Huisong Li <lihuisong@huawei.com> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Rosen Xu <rosen.xu@intel.com> Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
2021-10-18 13:48:48 +00:00
max_lro_size = max_rx_pktlen;
}
DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u",
dev->data->port_id, 1 << tmpl->rxq.sges_n);
if (desc % (1 << tmpl->rxq.sges_n)) {
DRV_LOG(ERR,
"port %u number of Rx queue descriptors (%u) is not a"
" multiple of SGEs per packet (%u)",
dev->data->port_id,
desc,
1 << tmpl->rxq.sges_n);
rte_errno = EINVAL;
goto error;
}
mlx5_max_lro_msg_size_adjust(dev, idx, max_lro_size);
/* Toggle RX checksum offload if hardware supports it. */
tmpl->rxq.csum = !!(offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM);
/* Configure Rx timestamp. */
tmpl->rxq.hw_timestamp = !!(offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP);
tmpl->rxq.timestamp_rx_flag = 0;
if (tmpl->rxq.hw_timestamp && rte_mbuf_dyn_rx_timestamp_register(
&tmpl->rxq.timestamp_offset,
&tmpl->rxq.timestamp_rx_flag) != 0) {
DRV_LOG(ERR, "Cannot register Rx timestamp field/flag");
goto error;
}
/* Configure VLAN stripping. */
tmpl->rxq.vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
/* By default, FCS (CRC) is stripped by hardware. */
tmpl->rxq.crc_present = 0;
tmpl->rxq.lro = lro_on_queue;
if (offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) {
if (priv->sh->config.hw_fcs_strip) {
/*
* RQs used for LRO-enabled TIRs should not be
* configured to scatter the FCS.
*/
if (lro_on_queue)
DRV_LOG(WARNING,
"port %u CRC stripping has been "
"disabled but will still be performed "
"by hardware, because LRO is enabled",
dev->data->port_id);
else
tmpl->rxq.crc_present = 1;
} else {
DRV_LOG(WARNING,
"port %u CRC stripping has been disabled but will"
" still be performed by hardware, make sure MLNX_OFED"
" and firmware are up to date",
dev->data->port_id);
}
}
DRV_LOG(DEBUG,
"port %u CRC stripping is %s, %u bytes will be subtracted from"
" incoming frames to hide it",
dev->data->port_id,
tmpl->rxq.crc_present ? "disabled" : "enabled",
tmpl->rxq.crc_present << 2);
tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf &&
(!!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS));
/* Save port ID. */
tmpl->rxq.port_id = dev->data->port_id;
tmpl->sh = priv->sh;
tmpl->rxq.mp = rx_seg[0].mp;
tmpl->rxq.elts_n = log2above(desc);
tmpl->rxq.rq_repl_thresh = MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n);
tmpl->rxq.elts = (struct rte_mbuf *(*)[desc_n])(tmpl + 1);
tmpl->rxq.mprq_bufs =
(struct mlx5_mprq_buf *(*)[desc])(*tmpl->rxq.elts + desc_n);
tmpl->rxq.idx = idx;
if (conf->share_group > 0) {
tmpl->rxq.shared = 1;
tmpl->share_group = conf->share_group;
tmpl->share_qid = conf->share_qid;
LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry);
}
LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
return tmpl;
error:
mlx5_mr_btree_free(&tmpl->rxq.mr_ctrl.cache_bh);
mlx5_free(tmpl);
return NULL;
}
/**
* Create a DPDK Rx hairpin queue.
*
* @param dev
* Pointer to Ethernet device.
* @param rxq
* RX queue.
* @param desc
* Number of descriptors to configure in queue.
* @param hairpin_conf
* The hairpin binding configuration.
*
* @return
* A DPDK queue object on success, NULL otherwise and rte_errno is set.
*/
struct mlx5_rxq_ctrl *
mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq,
uint16_t desc,
const struct rte_eth_hairpin_conf *hairpin_conf)
{
uint16_t idx = rxq->idx;
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_ctrl *tmpl;
tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0,
SOCKET_ID_ANY);
if (!tmpl) {
rte_errno = ENOMEM;
return NULL;
}
LIST_INIT(&tmpl->owners);
rxq->ctrl = tmpl;
LIST_INSERT_HEAD(&tmpl->owners, rxq, owner_entry);
tmpl->is_hairpin = true;
tmpl->socket = SOCKET_ID_ANY;
tmpl->rxq.rss_hash = 0;
tmpl->rxq.port_id = dev->data->port_id;
tmpl->sh = priv->sh;
tmpl->rxq.mp = NULL;
tmpl->rxq.elts_n = log2above(desc);
tmpl->rxq.elts = NULL;
tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
tmpl->rxq.idx = idx;
rxq->hairpin_conf = *hairpin_conf;
mlx5_rxq_ref(dev, idx);
LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
return tmpl;
}
/**
* Increase Rx queue reference count.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
*
* @return
* A pointer to the queue if it exists, NULL otherwise.
*/
struct mlx5_rxq_priv *
mlx5_rxq_ref(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
if (rxq != NULL)
__atomic_fetch_add(&rxq->refcnt, 1, __ATOMIC_RELAXED);
return rxq;
}
/**
* Dereference a Rx queue.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
*
* @return
* Updated reference count.
*/
uint32_t
mlx5_rxq_deref(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
if (rxq == NULL)
return 0;
return __atomic_sub_fetch(&rxq->refcnt, 1, __ATOMIC_RELAXED);
}
/**
* Get a Rx queue.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
*
* @return
* A pointer to the queue if it exists, NULL otherwise.
*/
struct mlx5_rxq_priv *
mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
if (idx >= priv->rxqs_n)
return NULL;
MLX5_ASSERT(priv->rxq_privs != NULL);
return (*priv->rxq_privs)[idx];
}
/**
* Get Rx queue shareable control.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
*
* @return
* A pointer to the queue control if it exists, NULL otherwise.
*/
struct mlx5_rxq_ctrl *
mlx5_rxq_ctrl_get(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
return rxq == NULL ? NULL : rxq->ctrl;
}
/**
* Get Rx queue shareable data.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
*
* @return
* A pointer to the queue data if it exists, NULL otherwise.
*/
struct mlx5_rxq_data *
mlx5_rxq_data_get(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
return rxq == NULL ? NULL : &rxq->ctrl->rxq;
}
/**
* Increase an external Rx queue reference count.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* External RX queue index.
*
* @return
* A pointer to the queue if it exists, NULL otherwise.
*/
struct mlx5_external_rxq *
mlx5_ext_rxq_ref(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_external_rxq *rxq = mlx5_ext_rxq_get(dev, idx);
__atomic_fetch_add(&rxq->refcnt, 1, __ATOMIC_RELAXED);
return rxq;
}
/**
* Decrease an external Rx queue reference count.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* External RX queue index.
*
* @return
* Updated reference count.
*/
uint32_t
mlx5_ext_rxq_deref(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_external_rxq *rxq = mlx5_ext_rxq_get(dev, idx);
return __atomic_sub_fetch(&rxq->refcnt, 1, __ATOMIC_RELAXED);
}
/**
* Get an external Rx queue.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* External Rx queue index.
*
* @return
* A pointer to the queue if it exists, NULL otherwise.
*/
struct mlx5_external_rxq *
mlx5_ext_rxq_get(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
MLX5_ASSERT(mlx5_is_external_rxq(dev, idx));
return &priv->ext_rxqs[idx - MLX5_EXTERNAL_RX_QUEUE_ID_MIN];
}
/**
* Dereference a list of Rx queues.
*
* @param dev
* Pointer to Ethernet device.
* @param queues
* List of Rx queues to deref.
* @param queues_n
* Number of queues in the array.
*/
static void
mlx5_rxqs_deref(struct rte_eth_dev *dev, uint16_t *queues,
const uint32_t queues_n)
{
uint32_t i;
for (i = 0; i < queues_n; i++) {
if (mlx5_is_external_rxq(dev, queues[i]))
claim_nonzero(mlx5_ext_rxq_deref(dev, queues[i]));
else
claim_nonzero(mlx5_rxq_deref(dev, queues[i]));
}
}
/**
* Increase reference count for list of Rx queues.
*
* @param dev
* Pointer to Ethernet device.
* @param queues
* List of Rx queues to ref.
* @param queues_n
* Number of queues in the array.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_rxqs_ref(struct rte_eth_dev *dev, uint16_t *queues,
const uint32_t queues_n)
{
uint32_t i;
for (i = 0; i != queues_n; ++i) {
if (mlx5_is_external_rxq(dev, queues[i])) {
if (mlx5_ext_rxq_ref(dev, queues[i]) == NULL)
goto error;
} else {
if (mlx5_rxq_ref(dev, queues[i]) == NULL)
goto error;
}
}
return 0;
error:
mlx5_rxqs_deref(dev, queues, i);
rte_errno = EINVAL;
return -rte_errno;
}
/**
* Release a Rx queue.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* RX queue index.
*
* @return
* 1 while a reference on it exists, 0 when freed.
*/
int
mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_priv *rxq;
struct mlx5_rxq_ctrl *rxq_ctrl;
uint32_t refcnt;
if (priv->rxq_privs == NULL)
return 0;
rxq = mlx5_rxq_get(dev, idx);
if (rxq == NULL || rxq->refcnt == 0)
return 0;
rxq_ctrl = rxq->ctrl;
refcnt = mlx5_rxq_deref(dev, idx);
if (refcnt > 1) {
return 1;
} else if (refcnt == 1) { /* RxQ stopped. */
priv->obj_ops.rxq_obj_release(rxq);
if (!rxq_ctrl->started && rxq_ctrl->obj != NULL) {
LIST_REMOVE(rxq_ctrl->obj, next);
mlx5_free(rxq_ctrl->obj);
rxq_ctrl->obj = NULL;
}
if (!rxq_ctrl->is_hairpin) {
if (!rxq_ctrl->started)
rxq_free_elts(rxq_ctrl);
dev->data->rx_queue_state[idx] =
RTE_ETH_QUEUE_STATE_STOPPED;
}
} else { /* Refcnt zero, closing device. */
LIST_REMOVE(rxq, owner_entry);
if (LIST_EMPTY(&rxq_ctrl->owners)) {
if (!rxq_ctrl->is_hairpin)
mlx5_mr_btree_free
(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
if (rxq_ctrl->rxq.shared)
LIST_REMOVE(rxq_ctrl, share_entry);
LIST_REMOVE(rxq_ctrl, next);
mlx5_free(rxq_ctrl);
}
dev->data->rx_queues[idx] = NULL;
mlx5_free(rxq);
(*priv->rxq_privs)[idx] = NULL;
}
return 0;
}
/**
* Verify the Rx Queue list is empty
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* The number of object not released.
*/
int
mlx5_rxq_verify(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_ctrl *rxq_ctrl;
int ret = 0;
LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
dev->data->port_id, rxq_ctrl->rxq.idx);
++ret;
}
return ret;
}
/**
* Verify the external Rx Queue list is empty.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* The number of object not released.
*/
int
mlx5_ext_rxq_verify(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_external_rxq *rxq;
uint32_t i;
int ret = 0;
if (priv->ext_rxqs == NULL)
return 0;
for (i = MLX5_EXTERNAL_RX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) {
rxq = mlx5_ext_rxq_get(dev, i);
if (rxq->refcnt < 2)
continue;
DRV_LOG(DEBUG, "Port %u external RxQ %u still referenced.",
dev->data->port_id, i);
++ret;
}
return ret;
}
/**
* Check whether RxQ type is Hairpin.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* Rx queue index.
*
* @return
* True if Rx queue type is Hairpin, otherwise False.
*/
bool
mlx5_rxq_is_hairpin(struct rte_eth_dev *dev, uint16_t idx)
{
struct mlx5_rxq_ctrl *rxq_ctrl;
if (mlx5_is_external_rxq(dev, idx))
return false;
rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
return (rxq_ctrl != NULL && rxq_ctrl->is_hairpin);
}
/*
* Get a Rx hairpin queue configuration.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
* Rx queue index.
*
* @return
* Pointer to the configuration if a hairpin RX queue, otherwise NULL.
*/
const struct rte_eth_hairpin_conf *
mlx5_rxq_get_hairpin_conf(struct rte_eth_dev *dev, uint16_t idx)
{
if (mlx5_rxq_is_hairpin(dev, idx)) {
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
return rxq != NULL ? &rxq->hairpin_conf : NULL;
}
return NULL;
}
/**
* Match queues listed in arguments to queues contained in indirection table
* object.
*
* @param ind_tbl
* Pointer to indirection table to match.
* @param queues
* Queues to match to queues in indirection table.
* @param queues_n
* Number of queues in the array.
*
* @return
* 1 if all queues in indirection table match 0 otherwise.
*/
static int
mlx5_ind_table_obj_match_queues(const struct mlx5_ind_table_obj *ind_tbl,
const uint16_t *queues, uint32_t queues_n)
{
return (ind_tbl->queues_n == queues_n) &&
(!memcmp(ind_tbl->queues, queues,
ind_tbl->queues_n * sizeof(ind_tbl->queues[0])));
}
/**
* Get an indirection table.
*
* @param dev
* Pointer to Ethernet device.
* @param queues
* Queues entering in the indirection table.
* @param queues_n
* Number of queues in the array.
*
* @return
* An indirection table if found.
*/
struct mlx5_ind_table_obj *
mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues,
uint32_t queues_n)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_ind_table_obj *ind_tbl;
rte_rwlock_read_lock(&priv->ind_tbls_lock);
LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
if ((ind_tbl->queues_n == queues_n) &&
(memcmp(ind_tbl->queues, queues,
ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
== 0)) {
__atomic_fetch_add(&ind_tbl->refcnt, 1,
__ATOMIC_RELAXED);
break;
}
}
rte_rwlock_read_unlock(&priv->ind_tbls_lock);
return ind_tbl;
}
/**
* Release an indirection table.
*
* @param dev
* Pointer to Ethernet device.
* @param ind_table
* Indirection table to release.
* @param deref_rxqs
* If true, then dereference RX queues related to indirection table.
* Otherwise, no additional action will be taken.
*
* @return
* 1 while a reference on it exists, 0 when freed.
*/
int
mlx5_ind_table_obj_release(struct rte_eth_dev *dev,
struct mlx5_ind_table_obj *ind_tbl,
bool deref_rxqs)
{
struct mlx5_priv *priv = dev->data->dev_private;
unsigned int ret;
rte_rwlock_write_lock(&priv->ind_tbls_lock);
ret = __atomic_sub_fetch(&ind_tbl->refcnt, 1, __ATOMIC_RELAXED);
if (!ret)
LIST_REMOVE(ind_tbl, next);
rte_rwlock_write_unlock(&priv->ind_tbls_lock);
if (ret)
return 1;
priv->obj_ops.ind_table_destroy(ind_tbl);
if (deref_rxqs)
mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n);
mlx5_free(ind_tbl);
return 0;
}
/**
* Verify the Rx Queue list is empty
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* The number of object not released.
*/
int
mlx5_ind_table_obj_verify(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_ind_table_obj *ind_tbl;
int ret = 0;
rte_rwlock_read_lock(&priv->ind_tbls_lock);
LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
DRV_LOG(DEBUG,
"port %u indirection table obj %p still referenced",
dev->data->port_id, (void *)ind_tbl);
++ret;
}
rte_rwlock_read_unlock(&priv->ind_tbls_lock);
return ret;
}
/**
* Setup an indirection table structure fields.
*
* @param dev
* Pointer to Ethernet device.
* @param ind_table
* Indirection table to modify.
* @param ref_qs
* Whether to increment RxQ reference counters.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_ind_table_obj_setup(struct rte_eth_dev *dev,
struct mlx5_ind_table_obj *ind_tbl,
bool ref_qs)
{
struct mlx5_priv *priv = dev->data->dev_private;
uint32_t queues_n = ind_tbl->queues_n;
int ret;
const unsigned int n = rte_is_power_of_2(queues_n) ?
log2above(queues_n) :
log2above(priv->sh->dev_cap.ind_table_max_size);
if (ref_qs && mlx5_rxqs_ref(dev, ind_tbl->queues, queues_n) < 0) {
DRV_LOG(DEBUG, "Port %u invalid indirection table queues.",
dev->data->port_id);
return -rte_errno;
}
ret = priv->obj_ops.ind_table_new(dev, n, ind_tbl);
if (ret) {
DRV_LOG(DEBUG, "Port %u cannot create a new indirection table.",
dev->data->port_id);
if (ref_qs) {
int err = rte_errno;
mlx5_rxqs_deref(dev, ind_tbl->queues, queues_n);
rte_errno = err;
}
return ret;
}
__atomic_fetch_add(&ind_tbl->refcnt, 1, __ATOMIC_RELAXED);
return 0;
}
/**
* Create an indirection table.
*
* @param dev
* Pointer to Ethernet device.
* @param queues
* Queues entering in the indirection table.
* @param queues_n
* Number of queues in the array.
* @param standalone
* Indirection table for Standalone queue.
* @param ref_qs
* Whether to increment RxQ reference counters.
*
* @return
* The Verbs/DevX object initialized, NULL otherwise and rte_errno is set.
*/
struct mlx5_ind_table_obj *
mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues,
uint32_t queues_n, bool standalone, bool ref_qs)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_ind_table_obj *ind_tbl;
int ret;
/*
* Allocate maximum queues for shared action as queue number
* maybe modified later.
*/
ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl) +
(standalone ? priv->rxqs_n : queues_n) *
sizeof(uint16_t), 0, SOCKET_ID_ANY);
if (!ind_tbl) {
rte_errno = ENOMEM;
return NULL;
}
ind_tbl->queues_n = queues_n;
ind_tbl->queues = (uint16_t *)(ind_tbl + 1);
memcpy(ind_tbl->queues, queues, queues_n * sizeof(*queues));
ret = mlx5_ind_table_obj_setup(dev, ind_tbl, ref_qs);
if (ret < 0) {
mlx5_free(ind_tbl);
return NULL;
}
rte_rwlock_write_lock(&priv->ind_tbls_lock);
if (!standalone)
LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
else
LIST_INSERT_HEAD(&priv->standalone_ind_tbls, ind_tbl, next);
rte_rwlock_write_unlock(&priv->ind_tbls_lock);
return ind_tbl;
}
static int
mlx5_ind_table_obj_check_standalone(struct rte_eth_dev *dev __rte_unused,
struct mlx5_ind_table_obj *ind_tbl)
{
uint32_t refcnt;
refcnt = __atomic_load_n(&ind_tbl->refcnt, __ATOMIC_RELAXED);
if (refcnt <= 1)
return 0;
/*
* Modification of indirection tables having more than 1
* reference is unsupported.
*/
DRV_LOG(DEBUG,
"Port %u cannot modify indirection table %p (refcnt %u > 1).",
dev->data->port_id, (void *)ind_tbl, refcnt);
rte_errno = EINVAL;
return -rte_errno;
}
/**
* Modify an indirection table.
*
* @param dev
* Pointer to Ethernet device.
* @param ind_table
* Indirection table to modify.
* @param queues
* Queues replacement for the indirection table.
* @param queues_n
* Number of queues in the array.
* @param standalone
* Indirection table for Standalone queue.
* @param ref_new_qs
* Whether to increment new RxQ set reference counters.
* @param deref_old_qs
* Whether to decrement old RxQ set reference counters.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_ind_table_obj_modify(struct rte_eth_dev *dev,
struct mlx5_ind_table_obj *ind_tbl,
uint16_t *queues, const uint32_t queues_n,
bool standalone, bool ref_new_qs, bool deref_old_qs)
{
struct mlx5_priv *priv = dev->data->dev_private;
int ret;
const unsigned int n = rte_is_power_of_2(queues_n) ?
log2above(queues_n) :
log2above(priv->sh->dev_cap.ind_table_max_size);
MLX5_ASSERT(standalone);
RTE_SET_USED(standalone);
if (mlx5_ind_table_obj_check_standalone(dev, ind_tbl) < 0)
return -rte_errno;
if (ref_new_qs && mlx5_rxqs_ref(dev, queues, queues_n) < 0) {
DRV_LOG(DEBUG, "Port %u invalid indirection table queues.",
dev->data->port_id);
return -rte_errno;
}
MLX5_ASSERT(priv->obj_ops.ind_table_modify);
ret = priv->obj_ops.ind_table_modify(dev, n, queues, queues_n, ind_tbl);
if (ret) {
DRV_LOG(DEBUG, "Port %u cannot modify indirection table.",
dev->data->port_id);
if (ref_new_qs) {
int err = rte_errno;
mlx5_rxqs_deref(dev, queues, queues_n);
rte_errno = err;
}
return ret;
}
if (deref_old_qs)
mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n);
ind_tbl->queues_n = queues_n;
ind_tbl->queues = queues;
return 0;
}
/**
* Attach an indirection table to its queues.
*
* @param dev
* Pointer to Ethernet device.
* @param ind_table
* Indirection table to attach.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_ind_table_obj_attach(struct rte_eth_dev *dev,
struct mlx5_ind_table_obj *ind_tbl)
{
int ret;
ret = mlx5_ind_table_obj_modify(dev, ind_tbl, ind_tbl->queues,
ind_tbl->queues_n,
true /* standalone */,
true /* ref_new_qs */,
false /* deref_old_qs */);
if (ret != 0)
DRV_LOG(ERR, "Port %u could not modify indirect table obj %p",
dev->data->port_id, (void *)ind_tbl);
return ret;
}
/**
* Detach an indirection table from its queues.
*
* @param dev
* Pointer to Ethernet device.
* @param ind_table
* Indirection table to detach.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_ind_table_obj_detach(struct rte_eth_dev *dev,
struct mlx5_ind_table_obj *ind_tbl)
{
struct mlx5_priv *priv = dev->data->dev_private;
const unsigned int n = rte_is_power_of_2(ind_tbl->queues_n) ?
log2above(ind_tbl->queues_n) :
log2above(priv->sh->dev_cap.ind_table_max_size);
unsigned int i;
int ret;
ret = mlx5_ind_table_obj_check_standalone(dev, ind_tbl);
if (ret != 0)
return ret;
MLX5_ASSERT(priv->obj_ops.ind_table_modify);
ret = priv->obj_ops.ind_table_modify(dev, n, NULL, 0, ind_tbl);
if (ret != 0) {
DRV_LOG(ERR, "Port %u could not modify indirect table obj %p",
dev->data->port_id, (void *)ind_tbl);
return ret;
}
for (i = 0; i < ind_tbl->queues_n; i++)
mlx5_rxq_release(dev, ind_tbl->queues[i]);
return ret;
}
int
mlx5_hrxq_match_cb(void *tool_ctx __rte_unused, struct mlx5_list_entry *entry,
void *cb_ctx)
{
struct mlx5_flow_cb_ctx *ctx = cb_ctx;
struct mlx5_flow_rss_desc *rss_desc = ctx->data;
struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry);
return (hrxq->rss_key_len != rss_desc->key_len ||
memcmp(hrxq->rss_key, rss_desc->key, rss_desc->key_len) ||
hrxq->hws_flags != rss_desc->hws_flags ||
hrxq->hash_fields != rss_desc->hash_fields ||
hrxq->ind_table->queues_n != rss_desc->queue_num ||
memcmp(hrxq->ind_table->queues, rss_desc->queue,
rss_desc->queue_num * sizeof(rss_desc->queue[0])));
}
/**
* Modify an Rx Hash queue configuration.
*
* @param dev
* Pointer to Ethernet device.
* @param hrxq
* Index to Hash Rx queue to modify.
* @param rss_key
* RSS key for the Rx hash queue.
* @param rss_key_len
* RSS key length.
* @param hash_fields
* Verbs protocol hash field to make the RSS on.
* @param queues
* Queues entering in hash queue. In case of empty hash_fields only the
* first queue index will be taken for the indirection table.
* @param queues_n
* Number of queues.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hrxq_idx,
const uint8_t *rss_key, uint32_t rss_key_len,
uint64_t hash_fields,
const uint16_t *queues, uint32_t queues_n)
{
int err;
struct mlx5_ind_table_obj *ind_tbl = NULL;
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hrxq *hrxq =
mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx);
bool dev_started = !!dev->data->dev_started;
int ret;
if (!hrxq) {
rte_errno = EINVAL;
return -rte_errno;
}
/* validations */
if (hrxq->rss_key_len != rss_key_len) {
/* rss_key_len is fixed size 40 byte & not supposed to change */
rte_errno = EINVAL;
return -rte_errno;
}
queues_n = hash_fields ? queues_n : 1;
if (mlx5_ind_table_obj_match_queues(hrxq->ind_table,
queues, queues_n)) {
ind_tbl = hrxq->ind_table;
} else {
if (hrxq->standalone) {
/*
* Replacement of indirection table unsupported for
* standalone hrxq objects (used by shared RSS).
*/
rte_errno = ENOTSUP;
return -rte_errno;
}
ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
if (!ind_tbl)
ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n,
hrxq->standalone,
dev_started);
}
if (!ind_tbl) {
rte_errno = ENOMEM;
return -rte_errno;
}
MLX5_ASSERT(priv->obj_ops.hrxq_modify);
ret = priv->obj_ops.hrxq_modify(dev, hrxq, rss_key,
hash_fields, ind_tbl);
if (ret) {
rte_errno = errno;
goto error;
}
if (ind_tbl != hrxq->ind_table) {
MLX5_ASSERT(!hrxq->standalone);
mlx5_ind_table_obj_release(dev, hrxq->ind_table, true);
hrxq->ind_table = ind_tbl;
}
hrxq->hash_fields = hash_fields;
memcpy(hrxq->rss_key, rss_key, rss_key_len);
return 0;
error:
err = rte_errno;
if (ind_tbl != hrxq->ind_table) {
MLX5_ASSERT(!hrxq->standalone);
mlx5_ind_table_obj_release(dev, ind_tbl, true);
}
rte_errno = err;
return -rte_errno;
}
static void
__mlx5_hrxq_remove(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq)
{
struct mlx5_priv *priv = dev->data->dev_private;
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
if (hrxq->hws_flags)
mlx5dr_action_destroy(hrxq->action);
else
mlx5_glue->destroy_flow_action(hrxq->action);
#endif
priv->obj_ops.hrxq_destroy(hrxq);
if (!hrxq->standalone) {
mlx5_ind_table_obj_release(dev, hrxq->ind_table,
hrxq->hws_flags ?
(!!dev->data->dev_started) : true);
}
mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx);
}
/**
* Release the hash Rx queue.
*
* @param dev
* Pointer to Ethernet device.
* @param hrxq
* Index to Hash Rx queue to release.
*
* @param list
* mlx5 list pointer.
* @param entry
* Hash queue entry pointer.
*/
void
mlx5_hrxq_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
{
struct rte_eth_dev *dev = tool_ctx;
struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry);
__mlx5_hrxq_remove(dev, hrxq);
}
static struct mlx5_hrxq *
__mlx5_hrxq_create(struct rte_eth_dev *dev,
struct mlx5_flow_rss_desc *rss_desc)
{
struct mlx5_priv *priv = dev->data->dev_private;
const uint8_t *rss_key = rss_desc->key;
uint32_t rss_key_len = rss_desc->key_len;
bool standalone = !!rss_desc->shared_rss;
const uint16_t *queues =
standalone ? rss_desc->const_q : rss_desc->queue;
uint32_t queues_n = rss_desc->queue_num;
struct mlx5_hrxq *hrxq = NULL;
uint32_t hrxq_idx = 0;
struct mlx5_ind_table_obj *ind_tbl = rss_desc->ind_tbl;
int ret;
queues_n = rss_desc->hash_fields ? queues_n : 1;
if (!ind_tbl && !rss_desc->hws_flags)
ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
if (!ind_tbl)
ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n,
standalone ||
rss_desc->hws_flags,
!!dev->data->dev_started);
if (!ind_tbl)
return NULL;
hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx);
if (!hrxq)
goto error;
hrxq->standalone = standalone;
hrxq->idx = hrxq_idx;
hrxq->ind_table = ind_tbl;
hrxq->rss_key_len = rss_key_len;
hrxq->hash_fields = rss_desc->hash_fields;
hrxq->hws_flags = rss_desc->hws_flags;
memcpy(hrxq->rss_key, rss_key, rss_key_len);
ret = priv->obj_ops.hrxq_new(dev, hrxq, rss_desc->tunnel);
if (ret < 0)
goto error;
return hrxq;
error:
if (!rss_desc->ind_tbl)
mlx5_ind_table_obj_release(dev, ind_tbl, true);
if (hrxq)
mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx);
return NULL;
}
struct mlx5_list_entry *
mlx5_hrxq_create_cb(void *tool_ctx, void *cb_ctx)
{
struct rte_eth_dev *dev = tool_ctx;
struct mlx5_flow_cb_ctx *ctx = cb_ctx;
struct mlx5_flow_rss_desc *rss_desc = ctx->data;
struct mlx5_hrxq *hrxq;
hrxq = __mlx5_hrxq_create(dev, rss_desc);
return hrxq ? &hrxq->entry : NULL;
}
struct mlx5_list_entry *
mlx5_hrxq_clone_cb(void *tool_ctx, struct mlx5_list_entry *entry,
void *cb_ctx __rte_unused)
{
struct rte_eth_dev *dev = tool_ctx;
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hrxq *hrxq;
uint32_t hrxq_idx = 0;
hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx);
if (!hrxq)
return NULL;
memcpy(hrxq, entry, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN);
hrxq->idx = hrxq_idx;
return &hrxq->entry;
}
void
mlx5_hrxq_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
{
struct rte_eth_dev *dev = tool_ctx;
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry);
mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx);
}
/**
* Get an Rx Hash queue.
*
* @param dev
* Pointer to Ethernet device.
* @param rss_desc
* RSS configuration for the Rx hash queue.
*
* @return
* An hash Rx queue on success.
*/
struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,
struct mlx5_flow_rss_desc *rss_desc)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hrxq *hrxq = NULL;
struct mlx5_list_entry *entry;
struct mlx5_flow_cb_ctx ctx = {
.data = rss_desc,
};
if (rss_desc->shared_rss) {
hrxq = __mlx5_hrxq_create(dev, rss_desc);
} else {
entry = mlx5_list_register(priv->hrxqs, &ctx);
if (!entry)
return NULL;
hrxq = container_of(entry, typeof(*hrxq), entry);
}
return hrxq;
}
/**
* Release the hash Rx queue.
*
* @param dev
* Pointer to Ethernet device.
* @param hrxq_idx
* Hash Rx queue to release.
*
* @return
* 1 while a reference on it exists, 0 when freed.
*/
int mlx5_hrxq_obj_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq)
{
struct mlx5_priv *priv = dev->data->dev_private;
if (!hrxq)
return 0;
if (!hrxq->standalone)
return mlx5_list_unregister(priv->hrxqs, &hrxq->entry);
__mlx5_hrxq_remove(dev, hrxq);
return 0;
}
/**
* Release the hash Rx queue with index.
*
* @param dev
* Pointer to Ethernet device.
* @param hrxq_idx
* Index to Hash Rx queue to release.
*
* @return
* 1 while a reference on it exists, 0 when freed.
*/
int mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hrxq_idx)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hrxq *hrxq;
hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx);
return mlx5_hrxq_obj_release(dev, hrxq);
}
/**
* Create a drop Rx Hash queue.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* The Verbs/DevX object initialized, NULL otherwise and rte_errno is set.
*/
struct mlx5_hrxq *
mlx5_drop_action_create(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hrxq *hrxq = NULL;
int ret;
if (priv->drop_queue.hrxq)
return priv->drop_queue.hrxq;
hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq), 0, SOCKET_ID_ANY);
if (!hrxq) {
DRV_LOG(WARNING,
"Port %u cannot allocate memory for drop queue.",
dev->data->port_id);
rte_errno = ENOMEM;
goto error;
}
priv->drop_queue.hrxq = hrxq;
hrxq->ind_table = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq->ind_table),
0, SOCKET_ID_ANY);
if (!hrxq->ind_table) {
rte_errno = ENOMEM;
goto error;
}
ret = priv->obj_ops.drop_action_create(dev);
if (ret < 0)
goto error;
return hrxq;
error:
if (hrxq) {
if (hrxq->ind_table)
mlx5_free(hrxq->ind_table);
priv->drop_queue.hrxq = NULL;
mlx5_free(hrxq);
}
return NULL;
}
/**
* Release a drop hash Rx queue.
*
* @param dev
* Pointer to Ethernet device.
*/
void
mlx5_drop_action_destroy(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
if (!priv->drop_queue.hrxq)
return;
priv->obj_ops.drop_action_destroy(dev);
mlx5_free(priv->drop_queue.rxq);
mlx5_free(hrxq->ind_table);
mlx5_free(hrxq);
priv->drop_queue.rxq = NULL;
priv->drop_queue.hrxq = NULL;
}
/**
* Verify the Rx Queue list is empty
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* The number of object not released.
*/
uint32_t
mlx5_hrxq_verify(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
return mlx5_list_get_entry_num(priv->hrxqs);
}
/**
* Set the Rx queue timestamp conversion parameters
*
* @param[in] dev
* Pointer to the Ethernet device structure.
*/
void
mlx5_rxq_timestamp_set(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_dev_ctx_shared *sh = priv->sh;
unsigned int i;
for (i = 0; i != priv->rxqs_n; ++i) {
struct mlx5_rxq_data *data = mlx5_rxq_data_get(dev, i);
if (data == NULL)
continue;
data->sh = sh;
data->rt_timestamp = sh->dev_cap.rt_timestamp;
}
}
/**
* Validate given external RxQ rte_plow index, and get pointer to concurrent
* external RxQ object to map/unmap.
*
* @param[in] port_id
* The port identifier of the Ethernet device.
* @param[in] dpdk_idx
* Queue index in rte_flow.
*
* @return
* Pointer to concurrent external RxQ on success,
* NULL otherwise and rte_errno is set.
*/
static struct mlx5_external_rxq *
mlx5_external_rx_queue_get_validate(uint16_t port_id, uint16_t dpdk_idx)
{
struct rte_eth_dev *dev;
struct mlx5_priv *priv;
if (dpdk_idx < MLX5_EXTERNAL_RX_QUEUE_ID_MIN) {
DRV_LOG(ERR, "Queue index %u should be in range: [%u, %u].",
dpdk_idx, MLX5_EXTERNAL_RX_QUEUE_ID_MIN, UINT16_MAX);
rte_errno = EINVAL;
return NULL;
}
if (rte_eth_dev_is_valid_port(port_id) < 0) {
DRV_LOG(ERR, "There is no Ethernet device for port %u.",
port_id);
rte_errno = ENODEV;
return NULL;
}
dev = &rte_eth_devices[port_id];
priv = dev->data->dev_private;
if (!mlx5_imported_pd_and_ctx(priv->sh->cdev)) {
DRV_LOG(ERR, "Port %u "
"external RxQ isn't supported on local PD and CTX.",
port_id);
rte_errno = ENOTSUP;
return NULL;
}
if (!mlx5_devx_obj_ops_en(priv->sh)) {
DRV_LOG(ERR,
"Port %u external RxQ isn't supported by Verbs API.",
port_id);
rte_errno = ENOTSUP;
return NULL;
}
/*
* When user configures remote PD and CTX and device creates RxQ by
* DevX, external RxQs array is allocated.
*/
MLX5_ASSERT(priv->ext_rxqs != NULL);
return &priv->ext_rxqs[dpdk_idx - MLX5_EXTERNAL_RX_QUEUE_ID_MIN];
}
int
rte_pmd_mlx5_external_rx_queue_id_map(uint16_t port_id, uint16_t dpdk_idx,
uint32_t hw_idx)
{
struct mlx5_external_rxq *ext_rxq;
uint32_t unmapped = 0;
ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx);
if (ext_rxq == NULL)
return -rte_errno;
if (!__atomic_compare_exchange_n(&ext_rxq->refcnt, &unmapped, 1, false,
__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
if (ext_rxq->hw_id != hw_idx) {
DRV_LOG(ERR, "Port %u external RxQ index %u "
"is already mapped to HW index (requesting is "
"%u, existing is %u).",
port_id, dpdk_idx, hw_idx, ext_rxq->hw_id);
rte_errno = EEXIST;
return -rte_errno;
}
DRV_LOG(WARNING, "Port %u external RxQ index %u "
"is already mapped to the requested HW index (%u)",
port_id, dpdk_idx, hw_idx);
} else {
ext_rxq->hw_id = hw_idx;
DRV_LOG(DEBUG, "Port %u external RxQ index %u "
"is successfully mapped to the requested HW index (%u)",
port_id, dpdk_idx, hw_idx);
}
return 0;
}
int
rte_pmd_mlx5_external_rx_queue_id_unmap(uint16_t port_id, uint16_t dpdk_idx)
{
struct mlx5_external_rxq *ext_rxq;
uint32_t mapped = 1;
ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx);
if (ext_rxq == NULL)
return -rte_errno;
if (ext_rxq->refcnt > 1) {
DRV_LOG(ERR, "Port %u external RxQ index %u still referenced.",
port_id, dpdk_idx);
rte_errno = EINVAL;
return -rte_errno;
}
if (!__atomic_compare_exchange_n(&ext_rxq->refcnt, &mapped, 0, false,
__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
DRV_LOG(ERR, "Port %u external RxQ index %u doesn't exist.",
port_id, dpdk_idx);
rte_errno = EINVAL;
return -rte_errno;
}
DRV_LOG(DEBUG,
"Port %u external RxQ index %u is successfully unmapped.",
port_id, dpdk_idx);
return 0;
}