numam-dpdk/drivers/net/mlx5/mlx5_trigger.c

1488 lines
41 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2015 6WIND S.A.
* Copyright 2015 Mellanox Technologies, Ltd
*/
#include <unistd.h>
#include <rte_ether.h>
#include <ethdev_driver.h>
#include <rte_interrupts.h>
#include <rte_alarm.h>
#include <rte_cycles.h>
#include <mlx5_malloc.h>
#include "mlx5.h"
#include "mlx5_flow.h"
#include "mlx5_rx.h"
#include "mlx5_tx.h"
#include "mlx5_utils.h"
#include "rte_pmd_mlx5.h"
/**
* Stop traffic on Tx queues.
*
* @param dev
* Pointer to Ethernet device structure.
*/
static void
mlx5_txq_stop(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
unsigned int i;
for (i = 0; i != priv->txqs_n; ++i)
mlx5_txq_release(dev, i);
}
/**
* Start traffic on Tx queues.
*
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_txq_start(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
unsigned int i;
int ret;
for (i = 0; i != priv->txqs_n; ++i) {
struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
if (!txq_ctrl)
continue;
if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
txq_alloc_elts(txq_ctrl);
MLX5_ASSERT(!txq_ctrl->obj);
txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
0, txq_ctrl->socket);
if (!txq_ctrl->obj) {
DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
"memory resources.", dev->data->port_id,
txq_data->idx);
rte_errno = ENOMEM;
goto error;
}
ret = priv->obj_ops.txq_obj_new(dev, i);
if (ret < 0) {
mlx5_free(txq_ctrl->obj);
txq_ctrl->obj = NULL;
goto error;
}
if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
txq_data->fcqs = mlx5_malloc(flags, size,
RTE_CACHE_LINE_SIZE,
txq_ctrl->socket);
if (!txq_data->fcqs) {
DRV_LOG(ERR, "Port %u Tx queue %u cannot "
"allocate memory (FCQ).",
dev->data->port_id, i);
rte_errno = ENOMEM;
goto error;
}
}
DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
dev->data->port_id, i, (void *)&txq_ctrl->obj);
LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
}
return 0;
error:
ret = rte_errno; /* Save rte_errno before cleanup. */
do {
mlx5_txq_release(dev, i);
} while (i-- != 0);
rte_errno = ret; /* Restore rte_errno. */
return -rte_errno;
}
/**
* Register Rx queue mempools and fill the Rx queue cache.
* This function tolerates repeated mempool registration.
*
* @param[in] rxq_ctrl
* Rx queue control data.
*
* @return
* 0 on success, (-1) on failure and rte_errno is set.
*/
static int
mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
{
struct rte_mempool *mp;
uint32_t s;
int ret = 0;
mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
/* MPRQ mempool is registered on creation, just fill the cache. */
common/mlx5: fix mempool registration Mempool registration was not correctly processing mempools with RTE_PKTMBUF_F_PINEND_EXT_BUF flag set ("pinned mempools" for short), because it is not known at registration time whether the mempool is a pktmbuf one, and its elements may not yet be initialized to analyze them. Attempts had been made to recognize such pools, but there was no robust solution, only the owner of a mempool (the application or a device) knows its type. This patch extends common/mlx5 registration code to accept a hint that the mempool is a pinned one and uses this capability from net/mlx5 driver. 1. Remove all code assuming pktmbuf pool type or trying to recognize the type of a pool. 2. Register pinned mempools used for Rx and their external memory on port start. Populate the MR cache with all their MRs. 3. Change Tx slow path logic as follows: 3.1. Search the mempool database for a memory region (MR) by the mbuf pool and its buffer address. 3.2. If not MR for the address is found for the mempool, and the mempool contains only pinned external buffers, perform the mempool registration of the mempool and its external pinned memory. 3.3. Fall back to using page-based MRs in other cases (for example, a buffer with externally attached memory, but not from a pinned mempool). Fixes: 690b2a88c2f7 ("common/mlx5: add mempool registration facilities") Fixes: fec28ca0e3a9 ("net/mlx5: support mempool registration") Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com> Reviewed-by: Matan Azrad <matan@nvidia.com> Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
2021-11-19 14:31:56 +00:00
if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
return mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
rxq_ctrl->rxq.mprq_mp);
for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
common/mlx5: fix mempool registration Mempool registration was not correctly processing mempools with RTE_PKTMBUF_F_PINEND_EXT_BUF flag set ("pinned mempools" for short), because it is not known at registration time whether the mempool is a pktmbuf one, and its elements may not yet be initialized to analyze them. Attempts had been made to recognize such pools, but there was no robust solution, only the owner of a mempool (the application or a device) knows its type. This patch extends common/mlx5 registration code to accept a hint that the mempool is a pinned one and uses this capability from net/mlx5 driver. 1. Remove all code assuming pktmbuf pool type or trying to recognize the type of a pool. 2. Register pinned mempools used for Rx and their external memory on port start. Populate the MR cache with all their MRs. 3. Change Tx slow path logic as follows: 3.1. Search the mempool database for a memory region (MR) by the mbuf pool and its buffer address. 3.2. If not MR for the address is found for the mempool, and the mempool contains only pinned external buffers, perform the mempool registration of the mempool and its external pinned memory. 3.3. Fall back to using page-based MRs in other cases (for example, a buffer with externally attached memory, but not from a pinned mempool). Fixes: 690b2a88c2f7 ("common/mlx5: add mempool registration facilities") Fixes: fec28ca0e3a9 ("net/mlx5: support mempool registration") Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com> Reviewed-by: Matan Azrad <matan@nvidia.com> Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
2021-11-19 14:31:56 +00:00
bool is_extmem;
mp = rxq_ctrl->rxq.rxseg[s].mp;
common/mlx5: fix mempool registration Mempool registration was not correctly processing mempools with RTE_PKTMBUF_F_PINEND_EXT_BUF flag set ("pinned mempools" for short), because it is not known at registration time whether the mempool is a pktmbuf one, and its elements may not yet be initialized to analyze them. Attempts had been made to recognize such pools, but there was no robust solution, only the owner of a mempool (the application or a device) knows its type. This patch extends common/mlx5 registration code to accept a hint that the mempool is a pinned one and uses this capability from net/mlx5 driver. 1. Remove all code assuming pktmbuf pool type or trying to recognize the type of a pool. 2. Register pinned mempools used for Rx and their external memory on port start. Populate the MR cache with all their MRs. 3. Change Tx slow path logic as follows: 3.1. Search the mempool database for a memory region (MR) by the mbuf pool and its buffer address. 3.2. If not MR for the address is found for the mempool, and the mempool contains only pinned external buffers, perform the mempool registration of the mempool and its external pinned memory. 3.3. Fall back to using page-based MRs in other cases (for example, a buffer with externally attached memory, but not from a pinned mempool). Fixes: 690b2a88c2f7 ("common/mlx5: add mempool registration facilities") Fixes: fec28ca0e3a9 ("net/mlx5: support mempool registration") Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com> Reviewed-by: Matan Azrad <matan@nvidia.com> Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
2021-11-19 14:31:56 +00:00
is_extmem = (rte_pktmbuf_priv_flags(mp) &
RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0;
ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp,
is_extmem);
if (ret < 0 && rte_errno != EEXIST)
return ret;
common/mlx5: fix mempool registration Mempool registration was not correctly processing mempools with RTE_PKTMBUF_F_PINEND_EXT_BUF flag set ("pinned mempools" for short), because it is not known at registration time whether the mempool is a pktmbuf one, and its elements may not yet be initialized to analyze them. Attempts had been made to recognize such pools, but there was no robust solution, only the owner of a mempool (the application or a device) knows its type. This patch extends common/mlx5 registration code to accept a hint that the mempool is a pinned one and uses this capability from net/mlx5 driver. 1. Remove all code assuming pktmbuf pool type or trying to recognize the type of a pool. 2. Register pinned mempools used for Rx and their external memory on port start. Populate the MR cache with all their MRs. 3. Change Tx slow path logic as follows: 3.1. Search the mempool database for a memory region (MR) by the mbuf pool and its buffer address. 3.2. If not MR for the address is found for the mempool, and the mempool contains only pinned external buffers, perform the mempool registration of the mempool and its external pinned memory. 3.3. Fall back to using page-based MRs in other cases (for example, a buffer with externally attached memory, but not from a pinned mempool). Fixes: 690b2a88c2f7 ("common/mlx5: add mempool registration facilities") Fixes: fec28ca0e3a9 ("net/mlx5: support mempool registration") Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com> Reviewed-by: Matan Azrad <matan@nvidia.com> Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
2021-11-19 14:31:56 +00:00
ret = mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
mp);
if (ret < 0)
return ret;
}
return 0;
}
/**
* Stop traffic on Rx queues.
*
* @param dev
* Pointer to Ethernet device structure.
*/
static void
mlx5_rxq_stop(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
unsigned int i;
for (i = 0; i != priv->rxqs_n; ++i)
mlx5_rxq_release(dev, i);
}
static int
mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
unsigned int idx)
{
int ret = 0;
if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
/*
* Pre-register the mempools. Regardless of whether
* the implicit registration is enabled or not,
* Rx mempool destruction is tracked to free MRs.
*/
if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
return -rte_errno;
ret = rxq_alloc_elts(rxq_ctrl);
if (ret)
return ret;
}
MLX5_ASSERT(!rxq_ctrl->obj);
rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
sizeof(*rxq_ctrl->obj), 0,
rxq_ctrl->socket);
if (!rxq_ctrl->obj) {
DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
dev->data->port_id, idx);
rte_errno = ENOMEM;
return -rte_errno;
}
DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
idx, (void *)&rxq_ctrl->obj);
return 0;
}
/**
* Start traffic on Rx queues.
*
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_rxq_start(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
unsigned int i;
int ret = 0;
/* Allocate/reuse/resize mempool for Multi-Packet RQ. */
if (mlx5_mprq_alloc_mp(dev)) {
/* Should not release Rx queues but return immediately. */
return -rte_errno;
}
DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
dev->data->port_id, priv->sh->device_attr.max_qp_wr);
DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
dev->data->port_id, priv->sh->device_attr.max_sge);
for (i = 0; i != priv->rxqs_n; ++i) {
struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
struct mlx5_rxq_ctrl *rxq_ctrl;
if (rxq == NULL)
continue;
rxq_ctrl = rxq->ctrl;
if (!rxq_ctrl->started) {
if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
goto error;
LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
}
ret = priv->obj_ops.rxq_obj_new(rxq);
if (ret) {
mlx5_free(rxq_ctrl->obj);
rxq_ctrl->obj = NULL;
goto error;
}
rxq_ctrl->started = true;
}
return 0;
error:
ret = rte_errno; /* Save rte_errno before cleanup. */
do {
mlx5_rxq_release(dev, i);
} while (i-- != 0);
rte_errno = ret; /* Restore rte_errno. */
return -rte_errno;
}
/**
* Binds Tx queues to Rx queues for hairpin.
*
* Binds Tx queues to the target Rx queues.
*
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
struct mlx5_txq_ctrl *txq_ctrl;
struct mlx5_rxq_priv *rxq;
struct mlx5_rxq_ctrl *rxq_ctrl;
struct mlx5_devx_obj *sq;
struct mlx5_devx_obj *rq;
unsigned int i;
int ret = 0;
bool need_auto = false;
uint16_t self_port = dev->data->port_id;
for (i = 0; i != priv->txqs_n; ++i) {
txq_ctrl = mlx5_txq_get(dev, i);
if (!txq_ctrl)
continue;
if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
txq_ctrl->hairpin_conf.peers[0].port != self_port) {
mlx5_txq_release(dev, i);
continue;
}
if (txq_ctrl->hairpin_conf.manual_bind) {
mlx5_txq_release(dev, i);
return 0;
}
need_auto = true;
mlx5_txq_release(dev, i);
}
if (!need_auto)
return 0;
for (i = 0; i != priv->txqs_n; ++i) {
txq_ctrl = mlx5_txq_get(dev, i);
if (!txq_ctrl)
continue;
/* Skip hairpin queues with other peer ports. */
if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
txq_ctrl->hairpin_conf.peers[0].port != self_port) {
mlx5_txq_release(dev, i);
continue;
}
if (!txq_ctrl->obj) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u no txq object found: %d",
dev->data->port_id, i);
mlx5_txq_release(dev, i);
return -rte_errno;
}
sq = txq_ctrl->obj->sq;
rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
if (rxq == NULL) {
mlx5_txq_release(dev, i);
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u no rxq object found: %d",
dev->data->port_id,
txq_ctrl->hairpin_conf.peers[0].queue);
return -rte_errno;
}
rxq_ctrl = rxq->ctrl;
if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
rxq->hairpin_conf.peers[0].queue != i) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
"Rx queue %d", dev->data->port_id,
i, txq_ctrl->hairpin_conf.peers[0].queue);
goto error;
}
rq = rxq_ctrl->obj->rq;
if (!rq) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
dev->data->port_id,
txq_ctrl->hairpin_conf.peers[0].queue);
goto error;
}
sq_attr.state = MLX5_SQC_STATE_RDY;
sq_attr.sq_state = MLX5_SQC_STATE_RST;
sq_attr.hairpin_peer_rq = rq->id;
sq_attr.hairpin_peer_vhca =
priv->sh->cdev->config.hca_attr.vhca_id;
ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
if (ret)
goto error;
rq_attr.state = MLX5_SQC_STATE_RDY;
rq_attr.rq_state = MLX5_SQC_STATE_RST;
rq_attr.hairpin_peer_sq = sq->id;
rq_attr.hairpin_peer_vhca =
priv->sh->cdev->config.hca_attr.vhca_id;
ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
if (ret)
goto error;
/* Qs with auto-bind will be destroyed directly. */
rxq->hairpin_status = 1;
txq_ctrl->hairpin_status = 1;
mlx5_txq_release(dev, i);
}
return 0;
error:
mlx5_txq_release(dev, i);
return -rte_errno;
}
/*
* Fetch the peer queue's SW & HW information.
*
* @param dev
* Pointer to Ethernet device structure.
* @param peer_queue
* Index of the queue to fetch the information.
* @param current_info
* Pointer to the input peer information, not used currently.
* @param peer_info
* Pointer to the structure to store the information, output.
* @param direction
* Positive to get the RxQ information, zero to get the TxQ information.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
struct rte_hairpin_peer_info *current_info,
struct rte_hairpin_peer_info *peer_info,
uint32_t direction)
{
struct mlx5_priv *priv = dev->data->dev_private;
RTE_SET_USED(current_info);
if (dev->data->dev_started == 0) {
rte_errno = EBUSY;
DRV_LOG(ERR, "peer port %u is not started",
dev->data->port_id);
return -rte_errno;
}
/*
* Peer port used as egress. In the current design, hairpin Tx queue
* will be bound to the peer Rx queue. Indeed, only the information of
* peer Rx queue needs to be fetched.
*/
if (direction == 0) {
struct mlx5_txq_ctrl *txq_ctrl;
txq_ctrl = mlx5_txq_get(dev, peer_queue);
if (txq_ctrl == NULL) {
rte_errno = EINVAL;
DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
dev->data->port_id, peer_queue);
return -rte_errno;
}
if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
dev->data->port_id, peer_queue);
mlx5_txq_release(dev, peer_queue);
return -rte_errno;
}
if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u no Txq object found: %d",
dev->data->port_id, peer_queue);
mlx5_txq_release(dev, peer_queue);
return -rte_errno;
}
peer_info->qp_id = txq_ctrl->obj->sq->id;
peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
/* 1-to-1 mapping, only the first one is used. */
peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
mlx5_txq_release(dev, peer_queue);
} else { /* Peer port used as ingress. */
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
struct mlx5_rxq_ctrl *rxq_ctrl;
if (rxq == NULL) {
rte_errno = EINVAL;
DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
dev->data->port_id, peer_queue);
return -rte_errno;
}
rxq_ctrl = rxq->ctrl;
if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
dev->data->port_id, peer_queue);
return -rte_errno;
}
if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u no Rxq object found: %d",
dev->data->port_id, peer_queue);
return -rte_errno;
}
peer_info->qp_id = rxq_ctrl->obj->rq->id;
peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
}
return 0;
}
/*
* Bind the hairpin queue with the peer HW information.
* This needs to be called twice both for Tx and Rx queues of a pair.
* If the queue is already bound, it is considered successful.
*
* @param dev
* Pointer to Ethernet device structure.
* @param cur_queue
* Index of the queue to change the HW configuration to bind.
* @param peer_info
* Pointer to information of the peer queue.
* @param direction
* Positive to configure the TxQ, zero to configure the RxQ.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
struct rte_hairpin_peer_info *peer_info,
uint32_t direction)
{
int ret = 0;
/*
* Consistency checking of the peer queue: opposite direction is used
* to get the peer queue info with ethdev port ID, no need to check.
*/
if (peer_info->peer_q != cur_queue) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
dev->data->port_id, cur_queue, peer_info->peer_q);
return -rte_errno;
}
if (direction != 0) {
struct mlx5_txq_ctrl *txq_ctrl;
struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
txq_ctrl = mlx5_txq_get(dev, cur_queue);
if (txq_ctrl == NULL) {
rte_errno = EINVAL;
DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
dev->data->port_id, cur_queue);
return -rte_errno;
}
if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
dev->data->port_id, cur_queue);
mlx5_txq_release(dev, cur_queue);
return -rte_errno;
}
if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u no Txq object found: %d",
dev->data->port_id, cur_queue);
mlx5_txq_release(dev, cur_queue);
return -rte_errno;
}
if (txq_ctrl->hairpin_status != 0) {
DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
dev->data->port_id, cur_queue);
mlx5_txq_release(dev, cur_queue);
return 0;
}
/*
* All queues' of one port consistency checking is done in the
* bind() function, and that is optional.
*/
if (peer_info->tx_explicit !=
txq_ctrl->hairpin_conf.tx_explicit) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
" mismatch", dev->data->port_id, cur_queue);
mlx5_txq_release(dev, cur_queue);
return -rte_errno;
}
if (peer_info->manual_bind !=
txq_ctrl->hairpin_conf.manual_bind) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
" mismatch", dev->data->port_id, cur_queue);
mlx5_txq_release(dev, cur_queue);
return -rte_errno;
}
sq_attr.state = MLX5_SQC_STATE_RDY;
sq_attr.sq_state = MLX5_SQC_STATE_RST;
sq_attr.hairpin_peer_rq = peer_info->qp_id;
sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
if (ret == 0)
txq_ctrl->hairpin_status = 1;
mlx5_txq_release(dev, cur_queue);
} else {
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
struct mlx5_rxq_ctrl *rxq_ctrl;
struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
if (rxq == NULL) {
rte_errno = EINVAL;
DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
dev->data->port_id, cur_queue);
return -rte_errno;
}
rxq_ctrl = rxq->ctrl;
if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
dev->data->port_id, cur_queue);
return -rte_errno;
}
if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u no Rxq object found: %d",
dev->data->port_id, cur_queue);
return -rte_errno;
}
if (rxq->hairpin_status != 0) {
DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
dev->data->port_id, cur_queue);
return 0;
}
if (peer_info->tx_explicit !=
rxq->hairpin_conf.tx_explicit) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
" mismatch", dev->data->port_id, cur_queue);
return -rte_errno;
}
if (peer_info->manual_bind !=
rxq->hairpin_conf.manual_bind) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
" mismatch", dev->data->port_id, cur_queue);
return -rte_errno;
}
rq_attr.state = MLX5_SQC_STATE_RDY;
rq_attr.rq_state = MLX5_SQC_STATE_RST;
rq_attr.hairpin_peer_sq = peer_info->qp_id;
rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
if (ret == 0)
rxq->hairpin_status = 1;
}
return ret;
}
/*
* Unbind the hairpin queue and reset its HW configuration.
* This needs to be called twice both for Tx and Rx queues of a pair.
* If the queue is already unbound, it is considered successful.
*
* @param dev
* Pointer to Ethernet device structure.
* @param cur_queue
* Index of the queue to change the HW configuration to unbind.
* @param direction
* Positive to reset the TxQ, zero to reset the RxQ.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
uint32_t direction)
{
int ret = 0;
if (direction != 0) {
struct mlx5_txq_ctrl *txq_ctrl;
struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
txq_ctrl = mlx5_txq_get(dev, cur_queue);
if (txq_ctrl == NULL) {
rte_errno = EINVAL;
DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
dev->data->port_id, cur_queue);
return -rte_errno;
}
if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
dev->data->port_id, cur_queue);
mlx5_txq_release(dev, cur_queue);
return -rte_errno;
}
/* Already unbound, return success before obj checking. */
if (txq_ctrl->hairpin_status == 0) {
DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
dev->data->port_id, cur_queue);
mlx5_txq_release(dev, cur_queue);
return 0;
}
if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u no Txq object found: %d",
dev->data->port_id, cur_queue);
mlx5_txq_release(dev, cur_queue);
return -rte_errno;
}
sq_attr.state = MLX5_SQC_STATE_RST;
sq_attr.sq_state = MLX5_SQC_STATE_RST;
ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
if (ret == 0)
txq_ctrl->hairpin_status = 0;
mlx5_txq_release(dev, cur_queue);
} else {
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
struct mlx5_rxq_ctrl *rxq_ctrl;
struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
if (rxq == NULL) {
rte_errno = EINVAL;
DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
dev->data->port_id, cur_queue);
return -rte_errno;
}
rxq_ctrl = rxq->ctrl;
if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
dev->data->port_id, cur_queue);
return -rte_errno;
}
if (rxq->hairpin_status == 0) {
DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
dev->data->port_id, cur_queue);
return 0;
}
if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u no Rxq object found: %d",
dev->data->port_id, cur_queue);
return -rte_errno;
}
rq_attr.state = MLX5_SQC_STATE_RST;
rq_attr.rq_state = MLX5_SQC_STATE_RST;
ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
if (ret == 0)
rxq->hairpin_status = 0;
}
return ret;
}
/*
* Bind the hairpin port pairs, from the Tx to the peer Rx.
* This function only supports to bind the Tx to one Rx.
*
* @param dev
* Pointer to Ethernet device structure.
* @param rx_port
* Port identifier of the Rx port.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
{
struct mlx5_priv *priv = dev->data->dev_private;
int ret = 0;
struct mlx5_txq_ctrl *txq_ctrl;
uint32_t i;
struct rte_hairpin_peer_info peer = {0xffffff};
struct rte_hairpin_peer_info cur;
const struct rte_eth_hairpin_conf *conf;
uint16_t num_q = 0;
uint16_t local_port = priv->dev_data->port_id;
uint32_t manual;
uint32_t explicit;
uint16_t rx_queue;
if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
rte_errno = ENODEV;
DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
return -rte_errno;
}
/*
* Before binding TxQ to peer RxQ, first round loop will be used for
* checking the queues' configuration consistency. This would be a
* little time consuming but better than doing the rollback.
*/
for (i = 0; i != priv->txqs_n; i++) {
txq_ctrl = mlx5_txq_get(dev, i);
if (txq_ctrl == NULL)
continue;
if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
mlx5_txq_release(dev, i);
continue;
}
/*
* All hairpin Tx queues of a single port that connected to the
* same peer Rx port should have the same "auto binding" and
* "implicit Tx flow" modes.
* Peer consistency checking will be done in per queue binding.
*/
conf = &txq_ctrl->hairpin_conf;
if (conf->peers[0].port == rx_port) {
if (num_q == 0) {
manual = conf->manual_bind;
explicit = conf->tx_explicit;
} else {
if (manual != conf->manual_bind ||
explicit != conf->tx_explicit) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u queue %d mode"
" mismatch: %u %u, %u %u",
local_port, i, manual,
conf->manual_bind, explicit,
conf->tx_explicit);
mlx5_txq_release(dev, i);
return -rte_errno;
}
}
num_q++;
}
mlx5_txq_release(dev, i);
}
/* Once no queue is configured, success is returned directly. */
if (num_q == 0)
return ret;
/* All the hairpin TX queues need to be traversed again. */
for (i = 0; i != priv->txqs_n; i++) {
txq_ctrl = mlx5_txq_get(dev, i);
if (txq_ctrl == NULL)
continue;
if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
mlx5_txq_release(dev, i);
continue;
}
if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
mlx5_txq_release(dev, i);
continue;
}
rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
/*
* Fetch peer RxQ's information.
* No need to pass the information of the current queue.
*/
ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
NULL, &peer, 1);
if (ret != 0) {
mlx5_txq_release(dev, i);
goto error;
}
/* Accessing its own device, inside mlx5 PMD. */
ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
if (ret != 0) {
mlx5_txq_release(dev, i);
goto error;
}
/* Pass TxQ's information to peer RxQ and try binding. */
cur.peer_q = rx_queue;
cur.qp_id = txq_ctrl->obj->sq->id;
cur.vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
/*
* In order to access another device in a proper way, RTE level
* private function is needed.
*/
ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
&cur, 0);
if (ret != 0) {
mlx5_txq_release(dev, i);
goto error;
}
mlx5_txq_release(dev, i);
}
return 0;
error:
/*
* Do roll-back process for the queues already bound.
* No need to check the return value of the queue unbind function.
*/
do {
/* No validation is needed here. */
txq_ctrl = mlx5_txq_get(dev, i);
if (txq_ctrl == NULL)
continue;
rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
mlx5_hairpin_queue_peer_unbind(dev, i, 1);
mlx5_txq_release(dev, i);
} while (i--);
return ret;
}
/*
* Unbind the hairpin port pair, HW configuration of both devices will be clear
* and status will be reset for all the queues used between them.
* This function only supports to unbind the Tx from one Rx.
*
* @param dev
* Pointer to Ethernet device structure.
* @param rx_port
* Port identifier of the Rx port.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_txq_ctrl *txq_ctrl;
uint32_t i;
int ret;
uint16_t cur_port = priv->dev_data->port_id;
if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
rte_errno = ENODEV;
DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
return -rte_errno;
}
for (i = 0; i != priv->txqs_n; i++) {
uint16_t rx_queue;
txq_ctrl = mlx5_txq_get(dev, i);
if (txq_ctrl == NULL)
continue;
if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
mlx5_txq_release(dev, i);
continue;
}
if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
mlx5_txq_release(dev, i);
continue;
}
/* Indeed, only the first used queue needs to be checked. */
if (txq_ctrl->hairpin_conf.manual_bind == 0) {
if (cur_port != rx_port) {
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u and port %u are in"
" auto-bind mode", cur_port, rx_port);
mlx5_txq_release(dev, i);
return -rte_errno;
} else {
return 0;
}
}
rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
mlx5_txq_release(dev, i);
ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
if (ret) {
DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
rx_port, rx_queue);
return ret;
}
ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
if (ret) {
DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
cur_port, i);
return ret;
}
}
return 0;
}
/*
* Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
* @see mlx5_hairpin_bind_single_port()
*/
int
mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
{
int ret = 0;
uint16_t p, pp;
/*
* If the Rx port has no hairpin configuration with the current port,
* the binding will be skipped in the called function of single port.
* Device started status will be checked only before the queue
* information updating.
*/
if (rx_port == RTE_MAX_ETHPORTS) {
MLX5_ETH_FOREACH_DEV(p, dev->device) {
ret = mlx5_hairpin_bind_single_port(dev, p);
if (ret != 0)
goto unbind;
}
return ret;
} else {
return mlx5_hairpin_bind_single_port(dev, rx_port);
}
unbind:
MLX5_ETH_FOREACH_DEV(pp, dev->device)
if (pp < p)
mlx5_hairpin_unbind_single_port(dev, pp);
return ret;
}
/*
* Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
* @see mlx5_hairpin_unbind_single_port()
*/
int
mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
{
int ret = 0;
uint16_t p;
if (rx_port == RTE_MAX_ETHPORTS)
MLX5_ETH_FOREACH_DEV(p, dev->device) {
ret = mlx5_hairpin_unbind_single_port(dev, p);
if (ret != 0)
return ret;
}
else
ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
return ret;
}
/*
* DPDK callback to get the hairpin peer ports list.
* This will return the actual number of peer ports and save the identifiers
* into the array (sorted, may be different from that when setting up the
* hairpin peer queues).
* The peer port ID could be the same as the port ID of the current device.
*
* @param dev
* Pointer to Ethernet device structure.
* @param peer_ports
* Pointer to array to save the port identifiers.
* @param len
* The length of the array.
* @param direction
* Current port to peer port direction.
* positive - current used as Tx to get all peer Rx ports.
* zero - current used as Rx to get all peer Tx ports.
*
* @return
* 0 or positive value on success, actual number of peer ports.
* a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
size_t len, uint32_t direction)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_txq_ctrl *txq_ctrl;
uint32_t i;
uint16_t pp;
uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
int ret = 0;
if (direction) {
for (i = 0; i < priv->txqs_n; i++) {
txq_ctrl = mlx5_txq_get(dev, i);
if (!txq_ctrl)
continue;
if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
mlx5_txq_release(dev, i);
continue;
}
pp = txq_ctrl->hairpin_conf.peers[0].port;
if (pp >= RTE_MAX_ETHPORTS) {
rte_errno = ERANGE;
mlx5_txq_release(dev, i);
DRV_LOG(ERR, "port %hu queue %u peer port "
"out of range %hu",
priv->dev_data->port_id, i, pp);
return -rte_errno;
}
bits[pp / 32] |= 1 << (pp % 32);
mlx5_txq_release(dev, i);
}
} else {
for (i = 0; i < priv->rxqs_n; i++) {
struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
struct mlx5_rxq_ctrl *rxq_ctrl;
if (rxq == NULL)
continue;
rxq_ctrl = rxq->ctrl;
if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
continue;
pp = rxq->hairpin_conf.peers[0].port;
if (pp >= RTE_MAX_ETHPORTS) {
rte_errno = ERANGE;
DRV_LOG(ERR, "port %hu queue %u peer port "
"out of range %hu",
priv->dev_data->port_id, i, pp);
return -rte_errno;
}
bits[pp / 32] |= 1 << (pp % 32);
}
}
for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
if (bits[i / 32] & (1 << (i % 32))) {
if ((size_t)ret >= len) {
rte_errno = E2BIG;
return -rte_errno;
}
peer_ports[ret++] = i;
}
}
return ret;
}
/**
* DPDK callback to start the device.
*
* Simulate device start by attaching all configured flows.
*
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_dev_start(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
int ret;
int fine_inline;
DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
fine_inline = rte_mbuf_dynflag_lookup
(RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
if (fine_inline >= 0)
rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
else
rte_net_mlx5_dynf_inline_mask = 0;
if (dev->data->nb_rx_queues > 0) {
ret = mlx5_dev_configure_rss_reta(dev);
if (ret) {
DRV_LOG(ERR, "port %u reta config failed: %s",
dev->data->port_id, strerror(rte_errno));
return -rte_errno;
}
}
ret = mlx5_txpp_start(dev);
if (ret) {
DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
dev->data->port_id, strerror(rte_errno));
goto error;
}
if ((priv->sh->cdev->config.devx && priv->config.dv_flow_en &&
priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
ret = priv->obj_ops.lb_dummy_queue_create(dev);
if (ret)
goto error;
}
ret = mlx5_txq_start(dev);
if (ret) {
DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
dev->data->port_id, strerror(rte_errno));
goto error;
}
if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
if (!priv->config.vf && !priv->config.sf &&
!priv->representor) {
ret = mlx5_get_flag_dropless_rq(dev);
if (ret < 0)
DRV_LOG(WARNING,
"port %u cannot query dropless flag",
dev->data->port_id);
else if (!ret)
DRV_LOG(WARNING,
"port %u dropless_rq OFF, no rearming",
dev->data->port_id);
} else {
DRV_LOG(DEBUG,
"port %u doesn't support dropless_rq flag",
dev->data->port_id);
}
}
ret = mlx5_rxq_start(dev);
if (ret) {
DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
dev->data->port_id, strerror(rte_errno));
goto error;
}
/*
* Such step will be skipped if there is no hairpin TX queue configured
* with RX peer queue from the same device.
*/
ret = mlx5_hairpin_auto_bind(dev);
if (ret) {
DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
dev->data->port_id, strerror(rte_errno));
goto error;
}
/* Set started flag here for the following steps like control flow. */
dev->data->dev_started = 1;
ret = mlx5_rx_intr_vec_enable(dev);
if (ret) {
DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
dev->data->port_id);
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
goto error;
}
mlx5_os_stats_init(dev);
/*
* Attach indirection table objects detached on port stop.
* They may be needed to create RSS in non-isolated mode.
*/
ret = mlx5_action_handle_attach(dev);
if (ret) {
DRV_LOG(ERR,
"port %u failed to attach indirect actions: %s",
dev->data->port_id, rte_strerror(rte_errno));
goto error;
}
net/mlx5: fix link status behavior This behavior is mixed between what should be handled by the application and what is under PMD responsibility. According to DPDK API: - link_update() should only query the link status [1] - link_set_{up,down}() should only set the link to the according status [1] - dev_{start,stop}() should enable/disable traffic reception/emission [2] On this PMD, the link status is retrieved from the net device associated owned by the Linux Kernel, it does not means that even when this interface is down, the PMD cannot send/receive traffic from the NIC those two information are unrelated, until the physical port is active and has a link, the PMD can receive/send traffic on the wire. According to DPDK API, calling the rte_eth_dev_start() even when the Linux interface link is down is then possible and allowed, as the traffic will flow between the DPDK application and the Physical port. This also means that a synchronization between the Linux interface and the DPDK application remains under the DPDK application responsibility. To handle such synchronization the application should behave as the following scheme, to start: rte_eth_get_link(port_id, &link); if (link.link_status == ETH_DOWN) rte_eth_dev_set_link_up(port_id); rte_eth_dev_start(port_id); Taking in account the possible returned values for each function. and to stop: rte_eth_dev_stop(port_id); rte_eth_dev_set_link_down(port_id); The application should also set the LSC interrupt callbacks to catch and behave accordingly when the administrator set the Linux device down/up. The same callbacks are called when the link on the medium falls/raise. [1] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev_core.h [2] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev.h#n1677 Fixes: c7bf62255edf ("net/mlx5: fix handling link status event") Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start") Cc: stable@dpdk.org Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Acked-by: Yongseok Koh <yskoh@mellanox.com>
2018-03-12 13:43:18 +00:00
ret = mlx5_traffic_enable(dev);
if (ret) {
DRV_LOG(ERR, "port %u failed to set defaults flows",
dev->data->port_id);
goto error;
}
/* Set a mask and offset of dynamic metadata flows into Rx queues. */
mlx5_flow_rxq_dynf_metadata_set(dev);
/* Set flags and context to convert Rx timestamps. */
mlx5_rxq_timestamp_set(dev);
/* Set a mask and offset of scheduling on timestamp into Tx queues. */
mlx5_txq_dynf_timestamp_set(dev);
/*
* In non-cached mode, it only needs to start the default mreg copy
* action and no flow created by application exists anymore.
* But it is worth wrapping the interface for further usage.
*/
ret = mlx5_flow_start_default(dev);
net/mlx5: fix link status behavior This behavior is mixed between what should be handled by the application and what is under PMD responsibility. According to DPDK API: - link_update() should only query the link status [1] - link_set_{up,down}() should only set the link to the according status [1] - dev_{start,stop}() should enable/disable traffic reception/emission [2] On this PMD, the link status is retrieved from the net device associated owned by the Linux Kernel, it does not means that even when this interface is down, the PMD cannot send/receive traffic from the NIC those two information are unrelated, until the physical port is active and has a link, the PMD can receive/send traffic on the wire. According to DPDK API, calling the rte_eth_dev_start() even when the Linux interface link is down is then possible and allowed, as the traffic will flow between the DPDK application and the Physical port. This also means that a synchronization between the Linux interface and the DPDK application remains under the DPDK application responsibility. To handle such synchronization the application should behave as the following scheme, to start: rte_eth_get_link(port_id, &link); if (link.link_status == ETH_DOWN) rte_eth_dev_set_link_up(port_id); rte_eth_dev_start(port_id); Taking in account the possible returned values for each function. and to stop: rte_eth_dev_stop(port_id); rte_eth_dev_set_link_down(port_id); The application should also set the LSC interrupt callbacks to catch and behave accordingly when the administrator set the Linux device down/up. The same callbacks are called when the link on the medium falls/raise. [1] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev_core.h [2] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev.h#n1677 Fixes: c7bf62255edf ("net/mlx5: fix handling link status event") Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start") Cc: stable@dpdk.org Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Acked-by: Yongseok Koh <yskoh@mellanox.com>
2018-03-12 13:43:18 +00:00
if (ret) {
DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
dev->data->port_id, strerror(rte_errno));
net/mlx5: fix link status behavior This behavior is mixed between what should be handled by the application and what is under PMD responsibility. According to DPDK API: - link_update() should only query the link status [1] - link_set_{up,down}() should only set the link to the according status [1] - dev_{start,stop}() should enable/disable traffic reception/emission [2] On this PMD, the link status is retrieved from the net device associated owned by the Linux Kernel, it does not means that even when this interface is down, the PMD cannot send/receive traffic from the NIC those two information are unrelated, until the physical port is active and has a link, the PMD can receive/send traffic on the wire. According to DPDK API, calling the rte_eth_dev_start() even when the Linux interface link is down is then possible and allowed, as the traffic will flow between the DPDK application and the Physical port. This also means that a synchronization between the Linux interface and the DPDK application remains under the DPDK application responsibility. To handle such synchronization the application should behave as the following scheme, to start: rte_eth_get_link(port_id, &link); if (link.link_status == ETH_DOWN) rte_eth_dev_set_link_up(port_id); rte_eth_dev_start(port_id); Taking in account the possible returned values for each function. and to stop: rte_eth_dev_stop(port_id); rte_eth_dev_set_link_down(port_id); The application should also set the LSC interrupt callbacks to catch and behave accordingly when the administrator set the Linux device down/up. The same callbacks are called when the link on the medium falls/raise. [1] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev_core.h [2] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev.h#n1677 Fixes: c7bf62255edf ("net/mlx5: fix handling link status event") Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start") Cc: stable@dpdk.org Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Acked-by: Yongseok Koh <yskoh@mellanox.com>
2018-03-12 13:43:18 +00:00
goto error;
}
if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
dev->data->port_id, rte_strerror(rte_errno));
goto error;
}
rte_wmb();
net/mlx5: fix link status behavior This behavior is mixed between what should be handled by the application and what is under PMD responsibility. According to DPDK API: - link_update() should only query the link status [1] - link_set_{up,down}() should only set the link to the according status [1] - dev_{start,stop}() should enable/disable traffic reception/emission [2] On this PMD, the link status is retrieved from the net device associated owned by the Linux Kernel, it does not means that even when this interface is down, the PMD cannot send/receive traffic from the NIC those two information are unrelated, until the physical port is active and has a link, the PMD can receive/send traffic on the wire. According to DPDK API, calling the rte_eth_dev_start() even when the Linux interface link is down is then possible and allowed, as the traffic will flow between the DPDK application and the Physical port. This also means that a synchronization between the Linux interface and the DPDK application remains under the DPDK application responsibility. To handle such synchronization the application should behave as the following scheme, to start: rte_eth_get_link(port_id, &link); if (link.link_status == ETH_DOWN) rte_eth_dev_set_link_up(port_id); rte_eth_dev_start(port_id); Taking in account the possible returned values for each function. and to stop: rte_eth_dev_stop(port_id); rte_eth_dev_set_link_down(port_id); The application should also set the LSC interrupt callbacks to catch and behave accordingly when the administrator set the Linux device down/up. The same callbacks are called when the link on the medium falls/raise. [1] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev_core.h [2] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev.h#n1677 Fixes: c7bf62255edf ("net/mlx5: fix handling link status event") Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start") Cc: stable@dpdk.org Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Acked-by: Yongseok Koh <yskoh@mellanox.com>
2018-03-12 13:43:18 +00:00
dev->tx_pkt_burst = mlx5_select_tx_function(dev);
dev->rx_pkt_burst = mlx5_select_rx_function(dev);
/* Enable datapath on secondary process. */
mlx5_mp_os_req_start_rxtx(dev);
if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
priv->sh->port[priv->dev_port - 1].ih_port_id =
(uint32_t)dev->data->port_id;
} else {
DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
dev->data->port_id);
dev->data->dev_conf.intr_conf.lsc = 0;
dev->data->dev_conf.intr_conf.rmv = 0;
}
if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
(uint32_t)dev->data->port_id;
return 0;
error:
ret = rte_errno; /* Save rte_errno before cleanup. */
/* Rollback. */
dev->data->dev_started = 0;
mlx5_flow_stop_default(dev);
mlx5_traffic_disable(dev);
mlx5_txq_stop(dev);
mlx5_rxq_stop(dev);
if (priv->obj_ops.lb_dummy_queue_release)
priv->obj_ops.lb_dummy_queue_release(dev);
mlx5_txpp_stop(dev); /* Stop last. */
rte_errno = ret; /* Restore rte_errno. */
return -rte_errno;
}
/**
* DPDK callback to stop the device.
*
* Simulate device stop by detaching all configured flows.
*
* @param dev
* Pointer to Ethernet device structure.
*/
int
mlx5_dev_stop(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
dev->data->dev_started = 0;
/* Prevent crashes when queues are still in use. */
dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
rte_wmb();
/* Disable datapath on secondary process. */
mlx5_mp_os_req_stop_rxtx(dev);
rte_delay_us_sleep(1000 * priv->rxqs_n);
DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
mlx5_flow_stop_default(dev);
/* Control flows for default traffic can be removed firstly. */
mlx5_traffic_disable(dev);
/* All RX queue flags will be cleared in the flush interface. */
mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
mlx5_flow_meter_rxq_flush(dev);
mlx5_action_handle_detach(dev);
mlx5_rx_intr_vec_disable(dev);
priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
mlx5_txq_stop(dev);
mlx5_rxq_stop(dev);
if (priv->obj_ops.lb_dummy_queue_release)
priv->obj_ops.lb_dummy_queue_release(dev);
mlx5_txpp_stop(dev);
return 0;
}
/**
* Enable traffic flows configured by control plane
*
* @param dev
* Pointer to Ethernet device private data.
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_traffic_enable(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct rte_flow_item_eth bcast = {
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
};
struct rte_flow_item_eth ipv6_multi_spec = {
.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
};
struct rte_flow_item_eth ipv6_multi_mask = {
.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
};
struct rte_flow_item_eth unicast = {
.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
};
struct rte_flow_item_eth unicast_mask = {
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
};
const unsigned int vlan_filter_n = priv->vlan_filter_n;
const struct rte_ether_addr cmp = {
.addr_bytes = "\x00\x00\x00\x00\x00\x00",
};
unsigned int i;
unsigned int j;
int ret;
/*
* Hairpin txq default flow should be created no matter if it is
* isolation mode. Or else all the packets to be sent will be sent
* out directly without the TX flow actions, e.g. encapsulation.
*/
for (i = 0; i != priv->txqs_n; ++i) {
struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
if (!txq_ctrl)
continue;
/* Only Tx implicit mode requires the default Tx flow. */
if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
txq_ctrl->hairpin_conf.tx_explicit == 0 &&
txq_ctrl->hairpin_conf.peers[0].port ==
priv->dev_data->port_id) {
ret = mlx5_ctrl_flow_source_queue(dev, i);
if (ret) {
mlx5_txq_release(dev, i);
goto error;
}
}
if (priv->config.dv_esw_en) {
if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
DRV_LOG(ERR,
"Port %u Tx queue %u SQ create representor devx default miss rule failed.",
dev->data->port_id, i);
goto error;
}
}
mlx5_txq_release(dev, i);
}
if (priv->config.dv_esw_en) {
if (mlx5_flow_create_esw_table_zero_flow(dev))
priv->fdb_def_rule = 1;
else
DRV_LOG(INFO, "port %u FDB default rule cannot be"
" configured - only Eswitch group 0 flows are"
" supported.", dev->data->port_id);
}
if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
ret = mlx5_flow_lacp_miss(dev);
if (ret)
DRV_LOG(INFO, "port %u LACP rule cannot be created - "
"forward LACP to kernel.", dev->data->port_id);
else
DRV_LOG(INFO, "LACP traffic will be missed in port %u."
, dev->data->port_id);
}
if (priv->isolated)
return 0;
if (dev->data->promiscuous) {
struct rte_flow_item_eth promisc = {
.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
.type = 0,
};
ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
if (ret)
goto error;
}
if (dev->data->all_multicast) {
struct rte_flow_item_eth multicast = {
.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
.type = 0,
};
ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
if (ret)
goto error;
} else {
/* Add broadcast/multicast flows. */
for (i = 0; i != vlan_filter_n; ++i) {
uint16_t vlan = priv->vlan_filter[i];
struct rte_flow_item_vlan vlan_spec = {
.tci = rte_cpu_to_be_16(vlan),
};
struct rte_flow_item_vlan vlan_mask =
rte_flow_item_vlan_mask;
ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
&vlan_spec, &vlan_mask);
if (ret)
goto error;
ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
&ipv6_multi_mask,
&vlan_spec, &vlan_mask);
if (ret)
goto error;
}
if (!vlan_filter_n) {
ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
if (ret)
goto error;
ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
&ipv6_multi_mask);
if (ret) {
/* Do not fail on IPv6 broadcast creation failure. */
DRV_LOG(WARNING,
"IPv6 broadcast is not supported");
ret = 0;
}
}
}
/* Add MAC address flows. */
for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
if (!memcmp(mac, &cmp, sizeof(*mac)))
continue;
memcpy(&unicast.dst.addr_bytes,
mac->addr_bytes,
net: add rte prefix to ether defines Add 'RTE_' prefix to defines: - rename ETHER_ADDR_LEN as RTE_ETHER_ADDR_LEN. - rename ETHER_TYPE_LEN as RTE_ETHER_TYPE_LEN. - rename ETHER_CRC_LEN as RTE_ETHER_CRC_LEN. - rename ETHER_HDR_LEN as RTE_ETHER_HDR_LEN. - rename ETHER_MIN_LEN as RTE_ETHER_MIN_LEN. - rename ETHER_MAX_LEN as RTE_ETHER_MAX_LEN. - rename ETHER_MTU as RTE_ETHER_MTU. - rename ETHER_MAX_VLAN_FRAME_LEN as RTE_ETHER_MAX_VLAN_FRAME_LEN. - rename ETHER_MAX_VLAN_ID as RTE_ETHER_MAX_VLAN_ID. - rename ETHER_MAX_JUMBO_FRAME_LEN as RTE_ETHER_MAX_JUMBO_FRAME_LEN. - rename ETHER_MIN_MTU as RTE_ETHER_MIN_MTU. - rename ETHER_LOCAL_ADMIN_ADDR as RTE_ETHER_LOCAL_ADMIN_ADDR. - rename ETHER_GROUP_ADDR as RTE_ETHER_GROUP_ADDR. - rename ETHER_TYPE_IPv4 as RTE_ETHER_TYPE_IPv4. - rename ETHER_TYPE_IPv6 as RTE_ETHER_TYPE_IPv6. - rename ETHER_TYPE_ARP as RTE_ETHER_TYPE_ARP. - rename ETHER_TYPE_VLAN as RTE_ETHER_TYPE_VLAN. - rename ETHER_TYPE_RARP as RTE_ETHER_TYPE_RARP. - rename ETHER_TYPE_QINQ as RTE_ETHER_TYPE_QINQ. - rename ETHER_TYPE_ETAG as RTE_ETHER_TYPE_ETAG. - rename ETHER_TYPE_1588 as RTE_ETHER_TYPE_1588. - rename ETHER_TYPE_SLOW as RTE_ETHER_TYPE_SLOW. - rename ETHER_TYPE_TEB as RTE_ETHER_TYPE_TEB. - rename ETHER_TYPE_LLDP as RTE_ETHER_TYPE_LLDP. - rename ETHER_TYPE_MPLS as RTE_ETHER_TYPE_MPLS. - rename ETHER_TYPE_MPLSM as RTE_ETHER_TYPE_MPLSM. - rename ETHER_VXLAN_HLEN as RTE_ETHER_VXLAN_HLEN. - rename ETHER_ADDR_FMT_SIZE as RTE_ETHER_ADDR_FMT_SIZE. - rename VXLAN_GPE_TYPE_IPV4 as RTE_VXLAN_GPE_TYPE_IPV4. - rename VXLAN_GPE_TYPE_IPV6 as RTE_VXLAN_GPE_TYPE_IPV6. - rename VXLAN_GPE_TYPE_ETH as RTE_VXLAN_GPE_TYPE_ETH. - rename VXLAN_GPE_TYPE_NSH as RTE_VXLAN_GPE_TYPE_NSH. - rename VXLAN_GPE_TYPE_MPLS as RTE_VXLAN_GPE_TYPE_MPLS. - rename VXLAN_GPE_TYPE_GBP as RTE_VXLAN_GPE_TYPE_GBP. - rename VXLAN_GPE_TYPE_VBNG as RTE_VXLAN_GPE_TYPE_VBNG. - rename ETHER_VXLAN_GPE_HLEN as RTE_ETHER_VXLAN_GPE_HLEN. Do not update the command line library to avoid adding a dependency to librte_net. Signed-off-by: Olivier Matz <olivier.matz@6wind.com> Reviewed-by: Stephen Hemminger <stephen@networkplumber.org> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
2019-05-21 16:13:05 +00:00
RTE_ETHER_ADDR_LEN);
for (j = 0; j != vlan_filter_n; ++j) {
uint16_t vlan = priv->vlan_filter[j];
struct rte_flow_item_vlan vlan_spec = {
.tci = rte_cpu_to_be_16(vlan),
};
struct rte_flow_item_vlan vlan_mask =
rte_flow_item_vlan_mask;
ret = mlx5_ctrl_flow_vlan(dev, &unicast,
&unicast_mask,
&vlan_spec,
&vlan_mask);
if (ret)
goto error;
}
if (!vlan_filter_n) {
ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
if (ret)
goto error;
}
}
return 0;
error:
ret = rte_errno; /* Save rte_errno before cleanup. */
mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
rte_errno = ret; /* Restore rte_errno. */
return -rte_errno;
}
/**
* Disable traffic flows configured by control plane
*
* @param dev
* Pointer to Ethernet device private data.
*/
void
mlx5_traffic_disable(struct rte_eth_dev *dev)
{
mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
}
/**
* Restart traffic flows configured by control plane
*
* @param dev
* Pointer to Ethernet device private data.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_traffic_restart(struct rte_eth_dev *dev)
{
if (dev->data->dev_started) {
mlx5_traffic_disable(dev);
return mlx5_traffic_enable(dev);
}
return 0;
}