2018-01-29 13:11:30 +00:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
* Copyright 2015 6WIND S.A.
|
2018-03-20 19:20:35 +00:00
|
|
|
* Copyright 2015 Mellanox Technologies, Ltd
|
2015-10-30 18:52:33 +00:00
|
|
|
*/
|
2018-01-29 13:11:30 +00:00
|
|
|
|
2017-10-09 14:44:43 +00:00
|
|
|
#include <unistd.h>
|
2015-10-30 18:52:33 +00:00
|
|
|
|
|
|
|
#include <rte_ether.h>
|
2018-01-22 00:16:22 +00:00
|
|
|
#include <rte_ethdev_driver.h>
|
2015-10-30 18:57:23 +00:00
|
|
|
#include <rte_interrupts.h>
|
|
|
|
#include <rte_alarm.h>
|
2015-10-30 18:52:33 +00:00
|
|
|
|
|
|
|
#include "mlx5.h"
|
|
|
|
#include "mlx5_rxtx.h"
|
|
|
|
#include "mlx5_utils.h"
|
|
|
|
|
2018-03-05 12:21:01 +00:00
|
|
|
/**
|
|
|
|
* Stop traffic on Tx queues.
|
|
|
|
*
|
|
|
|
* @param dev
|
|
|
|
* Pointer to Ethernet device structure.
|
|
|
|
*/
|
2017-10-09 14:44:48 +00:00
|
|
|
static void
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_txq_stop(struct rte_eth_dev *dev)
|
2017-10-09 14:44:48 +00:00
|
|
|
{
|
2019-02-21 09:29:14 +00:00
|
|
|
struct mlx5_priv *priv = dev->data->dev_private;
|
2017-10-09 14:44:48 +00:00
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i != priv->txqs_n; ++i)
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_txq_release(dev, i);
|
2017-10-09 14:44:48 +00:00
|
|
|
}
|
|
|
|
|
2018-03-05 12:21:01 +00:00
|
|
|
/**
|
|
|
|
* Start traffic on Tx queues.
|
|
|
|
*
|
|
|
|
* @param dev
|
|
|
|
* Pointer to Ethernet device structure.
|
|
|
|
*
|
|
|
|
* @return
|
2018-03-05 12:21:06 +00:00
|
|
|
* 0 on success, a negative errno value otherwise and rte_errno is set.
|
2018-03-05 12:21:01 +00:00
|
|
|
*/
|
2017-10-09 14:44:48 +00:00
|
|
|
static int
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_txq_start(struct rte_eth_dev *dev)
|
2017-10-09 14:44:48 +00:00
|
|
|
{
|
2019-02-21 09:29:14 +00:00
|
|
|
struct mlx5_priv *priv = dev->data->dev_private;
|
2017-10-09 14:44:48 +00:00
|
|
|
unsigned int i;
|
2018-03-05 12:21:06 +00:00
|
|
|
int ret;
|
2017-10-09 14:44:48 +00:00
|
|
|
|
|
|
|
for (i = 0; i != priv->txqs_n; ++i) {
|
2018-03-05 12:21:04 +00:00
|
|
|
struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
|
2017-10-09 14:44:48 +00:00
|
|
|
|
|
|
|
if (!txq_ctrl)
|
|
|
|
continue;
|
|
|
|
txq_alloc_elts(txq_ctrl);
|
2018-03-05 12:21:04 +00:00
|
|
|
txq_ctrl->ibv = mlx5_txq_ibv_new(dev, i);
|
2017-10-09 14:44:48 +00:00
|
|
|
if (!txq_ctrl->ibv) {
|
2018-03-05 12:21:06 +00:00
|
|
|
rte_errno = ENOMEM;
|
2017-10-09 14:44:48 +00:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
2018-03-05 12:21:06 +00:00
|
|
|
return 0;
|
2017-10-09 14:44:48 +00:00
|
|
|
error:
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = rte_errno; /* Save rte_errno before cleanup. */
|
2018-07-23 20:57:04 +00:00
|
|
|
do {
|
|
|
|
mlx5_txq_release(dev, i);
|
|
|
|
} while (i-- != 0);
|
2018-03-05 12:21:06 +00:00
|
|
|
rte_errno = ret; /* Restore rte_errno. */
|
|
|
|
return -rte_errno;
|
2017-10-09 14:44:48 +00:00
|
|
|
}
|
|
|
|
|
2018-03-05 12:21:01 +00:00
|
|
|
/**
|
|
|
|
* Stop traffic on Rx queues.
|
|
|
|
*
|
|
|
|
* @param dev
|
|
|
|
* Pointer to Ethernet device structure.
|
|
|
|
*/
|
2017-10-09 14:44:49 +00:00
|
|
|
static void
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_rxq_stop(struct rte_eth_dev *dev)
|
2017-10-09 14:44:49 +00:00
|
|
|
{
|
2019-02-21 09:29:14 +00:00
|
|
|
struct mlx5_priv *priv = dev->data->dev_private;
|
2017-10-09 14:44:49 +00:00
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i != priv->rxqs_n; ++i)
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_rxq_release(dev, i);
|
2017-10-09 14:44:49 +00:00
|
|
|
}
|
|
|
|
|
2018-03-05 12:21:01 +00:00
|
|
|
/**
|
|
|
|
* Start traffic on Rx queues.
|
|
|
|
*
|
|
|
|
* @param dev
|
|
|
|
* Pointer to Ethernet device structure.
|
|
|
|
*
|
|
|
|
* @return
|
2018-03-05 12:21:06 +00:00
|
|
|
* 0 on success, a negative errno value otherwise and rte_errno is set.
|
2018-03-05 12:21:01 +00:00
|
|
|
*/
|
2017-10-09 14:44:49 +00:00
|
|
|
static int
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_rxq_start(struct rte_eth_dev *dev)
|
2017-10-09 14:44:49 +00:00
|
|
|
{
|
2019-02-21 09:29:14 +00:00
|
|
|
struct mlx5_priv *priv = dev->data->dev_private;
|
2017-10-09 14:44:49 +00:00
|
|
|
unsigned int i;
|
|
|
|
int ret = 0;
|
|
|
|
|
2018-05-09 11:13:50 +00:00
|
|
|
/* Allocate/reuse/resize mempool for Multi-Packet RQ. */
|
2018-07-23 20:57:04 +00:00
|
|
|
if (mlx5_mprq_alloc_mp(dev)) {
|
|
|
|
/* Should not release Rx queues but return immediately. */
|
|
|
|
return -rte_errno;
|
|
|
|
}
|
2017-10-09 14:44:49 +00:00
|
|
|
for (i = 0; i != priv->rxqs_n; ++i) {
|
2018-03-05 12:21:04 +00:00
|
|
|
struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
|
net/mlx5: add new memory region support
This is the new design of Memory Region (MR) for mlx PMD, in order to:
- Accommodate the new memory hotplug model.
- Support non-contiguous Mempool.
There are multiple layers for MR search.
L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most
Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized
array by linear search. L0/L1 is in an inline function -
mlx5_mr_lookup_cache().
If L1 misses, the bottom-half function is called to look up the address
from the bigger local cache of the queue. This is L2 - mlx5_mr_addr2mr_bh()
and it is not an inline function. Data structure for L2 is the Binary Tree.
If L2 misses, the search falls into the slowest path which takes locks in
order to access global device cache (priv->mr.cache) which is also a B-tree
and caches the original MR list (priv->mr.mr_list) of the device. Unless
the global cache is overflowed, it is all-inclusive of the MR list. This is
L3 - mlx5_mr_lookup_dev(). The size of the L3 cache table is limited and
can't be expanded on the fly due to deadlock. Refer to the comments in the
code for the details - mr_lookup_dev(). If L3 is overflowed, the list will
have to be searched directly bypassing the cache although it is slower.
If L3 misses, a new MR for the address should be created -
mlx5_mr_create(). When it creates a new MR, it tries to register adjacent
memsegs as much as possible which are virtually contiguous around the
address. This must take two locks - memory_hotplug_lock and
priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any
allocation/free of memory inside.
In the free callback of the memory hotplug event, freed space is searched
from the MR list and corresponding bits are cleared from the bitmap of MRs.
This can fragment a MR and the MR will have multiple search entries in the
caches. Once there's a change by the event, the global cache must be
rebuilt and all the per-queue caches will be flushed as well. If memory is
frequently freed in run-time, that may cause jitter on dataplane processing
in the worst case by incurring MR cache flush and rebuild. But, it would be
the least probable scenario.
To guarantee the most optimal performance, it is highly recommended to use
an EAL option - '--socket-mem'. Then, the reserved memory will be pinned
and won't be freed dynamically. And it is also recommended to configure
per-lcore cache of Mempool. Even though there're many MRs for a device or
MRs are highly fragmented, the cache of Mempool will be much helpful to
reduce misses on per-queue caches anyway.
'--legacy-mem' is also supported.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 11:09:04 +00:00
|
|
|
struct rte_mempool *mp;
|
2017-10-09 14:44:49 +00:00
|
|
|
|
|
|
|
if (!rxq_ctrl)
|
|
|
|
continue;
|
net/mlx5: add new memory region support
This is the new design of Memory Region (MR) for mlx PMD, in order to:
- Accommodate the new memory hotplug model.
- Support non-contiguous Mempool.
There are multiple layers for MR search.
L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most
Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized
array by linear search. L0/L1 is in an inline function -
mlx5_mr_lookup_cache().
If L1 misses, the bottom-half function is called to look up the address
from the bigger local cache of the queue. This is L2 - mlx5_mr_addr2mr_bh()
and it is not an inline function. Data structure for L2 is the Binary Tree.
If L2 misses, the search falls into the slowest path which takes locks in
order to access global device cache (priv->mr.cache) which is also a B-tree
and caches the original MR list (priv->mr.mr_list) of the device. Unless
the global cache is overflowed, it is all-inclusive of the MR list. This is
L3 - mlx5_mr_lookup_dev(). The size of the L3 cache table is limited and
can't be expanded on the fly due to deadlock. Refer to the comments in the
code for the details - mr_lookup_dev(). If L3 is overflowed, the list will
have to be searched directly bypassing the cache although it is slower.
If L3 misses, a new MR for the address should be created -
mlx5_mr_create(). When it creates a new MR, it tries to register adjacent
memsegs as much as possible which are virtually contiguous around the
address. This must take two locks - memory_hotplug_lock and
priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any
allocation/free of memory inside.
In the free callback of the memory hotplug event, freed space is searched
from the MR list and corresponding bits are cleared from the bitmap of MRs.
This can fragment a MR and the MR will have multiple search entries in the
caches. Once there's a change by the event, the global cache must be
rebuilt and all the per-queue caches will be flushed as well. If memory is
frequently freed in run-time, that may cause jitter on dataplane processing
in the worst case by incurring MR cache flush and rebuild. But, it would be
the least probable scenario.
To guarantee the most optimal performance, it is highly recommended to use
an EAL option - '--socket-mem'. Then, the reserved memory will be pinned
and won't be freed dynamically. And it is also recommended to configure
per-lcore cache of Mempool. Even though there're many MRs for a device or
MRs are highly fragmented, the cache of Mempool will be much helpful to
reduce misses on per-queue caches anyway.
'--legacy-mem' is also supported.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 11:09:04 +00:00
|
|
|
/* Pre-register Rx mempool. */
|
2018-05-09 11:13:50 +00:00
|
|
|
mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
|
|
|
|
rxq_ctrl->rxq.mprq_mp : rxq_ctrl->rxq.mp;
|
net/mlx5: add new memory region support
This is the new design of Memory Region (MR) for mlx PMD, in order to:
- Accommodate the new memory hotplug model.
- Support non-contiguous Mempool.
There are multiple layers for MR search.
L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most
Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized
array by linear search. L0/L1 is in an inline function -
mlx5_mr_lookup_cache().
If L1 misses, the bottom-half function is called to look up the address
from the bigger local cache of the queue. This is L2 - mlx5_mr_addr2mr_bh()
and it is not an inline function. Data structure for L2 is the Binary Tree.
If L2 misses, the search falls into the slowest path which takes locks in
order to access global device cache (priv->mr.cache) which is also a B-tree
and caches the original MR list (priv->mr.mr_list) of the device. Unless
the global cache is overflowed, it is all-inclusive of the MR list. This is
L3 - mlx5_mr_lookup_dev(). The size of the L3 cache table is limited and
can't be expanded on the fly due to deadlock. Refer to the comments in the
code for the details - mr_lookup_dev(). If L3 is overflowed, the list will
have to be searched directly bypassing the cache although it is slower.
If L3 misses, a new MR for the address should be created -
mlx5_mr_create(). When it creates a new MR, it tries to register adjacent
memsegs as much as possible which are virtually contiguous around the
address. This must take two locks - memory_hotplug_lock and
priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any
allocation/free of memory inside.
In the free callback of the memory hotplug event, freed space is searched
from the MR list and corresponding bits are cleared from the bitmap of MRs.
This can fragment a MR and the MR will have multiple search entries in the
caches. Once there's a change by the event, the global cache must be
rebuilt and all the per-queue caches will be flushed as well. If memory is
frequently freed in run-time, that may cause jitter on dataplane processing
in the worst case by incurring MR cache flush and rebuild. But, it would be
the least probable scenario.
To guarantee the most optimal performance, it is highly recommended to use
an EAL option - '--socket-mem'. Then, the reserved memory will be pinned
and won't be freed dynamically. And it is also recommended to configure
per-lcore cache of Mempool. Even though there're many MRs for a device or
MRs are highly fragmented, the cache of Mempool will be much helpful to
reduce misses on per-queue caches anyway.
'--legacy-mem' is also supported.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 11:09:04 +00:00
|
|
|
DRV_LOG(DEBUG,
|
|
|
|
"port %u Rx queue %u registering"
|
|
|
|
" mp %s having %u chunks",
|
2019-04-10 18:41:16 +00:00
|
|
|
dev->data->port_id, rxq_ctrl->rxq.idx,
|
net/mlx5: add new memory region support
This is the new design of Memory Region (MR) for mlx PMD, in order to:
- Accommodate the new memory hotplug model.
- Support non-contiguous Mempool.
There are multiple layers for MR search.
L0 is to look up the last-hit entry which is pointed by mr_ctrl->mru (Most
Recently Used). If L0 misses, L1 is to look up the address in a fixed-sized
array by linear search. L0/L1 is in an inline function -
mlx5_mr_lookup_cache().
If L1 misses, the bottom-half function is called to look up the address
from the bigger local cache of the queue. This is L2 - mlx5_mr_addr2mr_bh()
and it is not an inline function. Data structure for L2 is the Binary Tree.
If L2 misses, the search falls into the slowest path which takes locks in
order to access global device cache (priv->mr.cache) which is also a B-tree
and caches the original MR list (priv->mr.mr_list) of the device. Unless
the global cache is overflowed, it is all-inclusive of the MR list. This is
L3 - mlx5_mr_lookup_dev(). The size of the L3 cache table is limited and
can't be expanded on the fly due to deadlock. Refer to the comments in the
code for the details - mr_lookup_dev(). If L3 is overflowed, the list will
have to be searched directly bypassing the cache although it is slower.
If L3 misses, a new MR for the address should be created -
mlx5_mr_create(). When it creates a new MR, it tries to register adjacent
memsegs as much as possible which are virtually contiguous around the
address. This must take two locks - memory_hotplug_lock and
priv->mr.rwlock. Due to memory_hotplug_lock, there can't be any
allocation/free of memory inside.
In the free callback of the memory hotplug event, freed space is searched
from the MR list and corresponding bits are cleared from the bitmap of MRs.
This can fragment a MR and the MR will have multiple search entries in the
caches. Once there's a change by the event, the global cache must be
rebuilt and all the per-queue caches will be flushed as well. If memory is
frequently freed in run-time, that may cause jitter on dataplane processing
in the worst case by incurring MR cache flush and rebuild. But, it would be
the least probable scenario.
To guarantee the most optimal performance, it is highly recommended to use
an EAL option - '--socket-mem'. Then, the reserved memory will be pinned
and won't be freed dynamically. And it is also recommended to configure
per-lcore cache of Mempool. Even though there're many MRs for a device or
MRs are highly fragmented, the cache of Mempool will be much helpful to
reduce misses on per-queue caches anyway.
'--legacy-mem' is also supported.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
2018-05-09 11:09:04 +00:00
|
|
|
mp->name, mp->nb_mem_chunks);
|
|
|
|
mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
|
2017-10-09 14:44:49 +00:00
|
|
|
ret = rxq_alloc_elts(rxq_ctrl);
|
|
|
|
if (ret)
|
|
|
|
goto error;
|
2018-03-05 12:21:04 +00:00
|
|
|
rxq_ctrl->ibv = mlx5_rxq_ibv_new(dev, i);
|
2018-03-05 12:21:06 +00:00
|
|
|
if (!rxq_ctrl->ibv)
|
2017-10-09 14:44:49 +00:00
|
|
|
goto error;
|
2019-05-30 10:20:38 +00:00
|
|
|
rxq_ctrl->wqn = rxq_ctrl->ibv->wq->wq_num;
|
2017-10-09 14:44:49 +00:00
|
|
|
}
|
2018-03-05 12:21:06 +00:00
|
|
|
return 0;
|
2017-10-09 14:44:49 +00:00
|
|
|
error:
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = rte_errno; /* Save rte_errno before cleanup. */
|
2018-07-23 20:57:04 +00:00
|
|
|
do {
|
|
|
|
mlx5_rxq_release(dev, i);
|
|
|
|
} while (i-- != 0);
|
2018-03-05 12:21:06 +00:00
|
|
|
rte_errno = ret; /* Restore rte_errno. */
|
|
|
|
return -rte_errno;
|
2017-10-09 14:44:49 +00:00
|
|
|
}
|
|
|
|
|
2015-10-30 18:52:33 +00:00
|
|
|
/**
|
|
|
|
* DPDK callback to start the device.
|
|
|
|
*
|
|
|
|
* Simulate device start by attaching all configured flows.
|
|
|
|
*
|
|
|
|
* @param dev
|
|
|
|
* Pointer to Ethernet device structure.
|
|
|
|
*
|
|
|
|
* @return
|
2018-03-05 12:21:06 +00:00
|
|
|
* 0 on success, a negative errno value otherwise and rte_errno is set.
|
2015-10-30 18:52:33 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
mlx5_dev_start(struct rte_eth_dev *dev)
|
|
|
|
{
|
2019-02-21 09:29:14 +00:00
|
|
|
struct mlx5_priv *priv = dev->data->dev_private;
|
2018-03-05 12:21:06 +00:00
|
|
|
int ret;
|
2015-10-30 18:52:33 +00:00
|
|
|
|
2018-07-23 20:57:04 +00:00
|
|
|
DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = mlx5_txq_start(dev);
|
|
|
|
if (ret) {
|
2018-03-13 09:23:56 +00:00
|
|
|
DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
|
|
|
|
dev->data->port_id, strerror(rte_errno));
|
2018-07-23 20:57:04 +00:00
|
|
|
return -rte_errno;
|
2017-10-09 14:44:48 +00:00
|
|
|
}
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = mlx5_rxq_start(dev);
|
|
|
|
if (ret) {
|
2018-03-13 09:23:56 +00:00
|
|
|
DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
|
|
|
|
dev->data->port_id, strerror(rte_errno));
|
2018-07-23 20:57:04 +00:00
|
|
|
mlx5_txq_stop(dev);
|
|
|
|
return -rte_errno;
|
2017-10-09 14:44:49 +00:00
|
|
|
}
|
2018-07-23 20:57:04 +00:00
|
|
|
dev->data->dev_started = 1;
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = mlx5_rx_intr_vec_enable(dev);
|
|
|
|
if (ret) {
|
2018-03-13 09:23:56 +00:00
|
|
|
DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
|
|
|
|
dev->data->port_id);
|
2017-06-14 11:49:17 +00:00
|
|
|
goto error;
|
2017-03-14 13:03:09 +00:00
|
|
|
}
|
2018-11-23 08:03:37 +00:00
|
|
|
mlx5_stats_init(dev);
|
net/mlx5: fix link status behavior
This behavior is mixed between what should be handled by the application
and what is under PMD responsibility.
According to DPDK API:
- link_update() should only query the link status [1]
- link_set_{up,down}() should only set the link to the according status [1]
- dev_{start,stop}() should enable/disable traffic reception/emission [2]
On this PMD, the link status is retrieved from the net device associated
owned by the Linux Kernel, it does not means that even when this interface
is down, the PMD cannot send/receive traffic from the NIC those two
information are unrelated, until the physical port is active and has a
link, the PMD can receive/send traffic on the wire.
According to DPDK API, calling the rte_eth_dev_start() even when the Linux
interface link is down is then possible and allowed, as the traffic will
flow between the DPDK application and the Physical port.
This also means that a synchronization between the Linux interface and the
DPDK application remains under the DPDK application responsibility.
To handle such synchronization the application should behave as the
following scheme, to start:
rte_eth_get_link(port_id, &link);
if (link.link_status == ETH_DOWN)
rte_eth_dev_set_link_up(port_id);
rte_eth_dev_start(port_id);
Taking in account the possible returned values for each function.
and to stop:
rte_eth_dev_stop(port_id);
rte_eth_dev_set_link_down(port_id);
The application should also set the LSC interrupt callbacks to catch and
behave accordingly when the administrator set the Linux device down/up.
The same callbacks are called when the link on the medium falls/raise.
[1] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev_core.h
[2] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev.h#n1677
Fixes: c7bf62255edf ("net/mlx5: fix handling link status event")
Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start")
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
2018-03-12 13:43:18 +00:00
|
|
|
ret = mlx5_traffic_enable(dev);
|
2018-03-05 12:21:06 +00:00
|
|
|
if (ret) {
|
net/mlx5: fix link status behavior
This behavior is mixed between what should be handled by the application
and what is under PMD responsibility.
According to DPDK API:
- link_update() should only query the link status [1]
- link_set_{up,down}() should only set the link to the according status [1]
- dev_{start,stop}() should enable/disable traffic reception/emission [2]
On this PMD, the link status is retrieved from the net device associated
owned by the Linux Kernel, it does not means that even when this interface
is down, the PMD cannot send/receive traffic from the NIC those two
information are unrelated, until the physical port is active and has a
link, the PMD can receive/send traffic on the wire.
According to DPDK API, calling the rte_eth_dev_start() even when the Linux
interface link is down is then possible and allowed, as the traffic will
flow between the DPDK application and the Physical port.
This also means that a synchronization between the Linux interface and the
DPDK application remains under the DPDK application responsibility.
To handle such synchronization the application should behave as the
following scheme, to start:
rte_eth_get_link(port_id, &link);
if (link.link_status == ETH_DOWN)
rte_eth_dev_set_link_up(port_id);
rte_eth_dev_start(port_id);
Taking in account the possible returned values for each function.
and to stop:
rte_eth_dev_stop(port_id);
rte_eth_dev_set_link_down(port_id);
The application should also set the LSC interrupt callbacks to catch and
behave accordingly when the administrator set the Linux device down/up.
The same callbacks are called when the link on the medium falls/raise.
[1] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev_core.h
[2] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev.h#n1677
Fixes: c7bf62255edf ("net/mlx5: fix handling link status event")
Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start")
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
2018-03-12 13:43:18 +00:00
|
|
|
DRV_LOG(DEBUG, "port %u failed to set defaults flows",
|
2018-03-13 09:23:56 +00:00
|
|
|
dev->data->port_id);
|
2018-01-25 16:04:28 +00:00
|
|
|
goto error;
|
|
|
|
}
|
net/mlx5: fix link status behavior
This behavior is mixed between what should be handled by the application
and what is under PMD responsibility.
According to DPDK API:
- link_update() should only query the link status [1]
- link_set_{up,down}() should only set the link to the according status [1]
- dev_{start,stop}() should enable/disable traffic reception/emission [2]
On this PMD, the link status is retrieved from the net device associated
owned by the Linux Kernel, it does not means that even when this interface
is down, the PMD cannot send/receive traffic from the NIC those two
information are unrelated, until the physical port is active and has a
link, the PMD can receive/send traffic on the wire.
According to DPDK API, calling the rte_eth_dev_start() even when the Linux
interface link is down is then possible and allowed, as the traffic will
flow between the DPDK application and the Physical port.
This also means that a synchronization between the Linux interface and the
DPDK application remains under the DPDK application responsibility.
To handle such synchronization the application should behave as the
following scheme, to start:
rte_eth_get_link(port_id, &link);
if (link.link_status == ETH_DOWN)
rte_eth_dev_set_link_up(port_id);
rte_eth_dev_start(port_id);
Taking in account the possible returned values for each function.
and to stop:
rte_eth_dev_stop(port_id);
rte_eth_dev_set_link_down(port_id);
The application should also set the LSC interrupt callbacks to catch and
behave accordingly when the administrator set the Linux device down/up.
The same callbacks are called when the link on the medium falls/raise.
[1] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev_core.h
[2] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev.h#n1677
Fixes: c7bf62255edf ("net/mlx5: fix handling link status event")
Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start")
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
2018-03-12 13:43:18 +00:00
|
|
|
ret = mlx5_flow_start(dev, &priv->flows);
|
|
|
|
if (ret) {
|
|
|
|
DRV_LOG(DEBUG, "port %u failed to set flows",
|
|
|
|
dev->data->port_id);
|
|
|
|
goto error;
|
|
|
|
}
|
2019-04-01 21:12:56 +00:00
|
|
|
rte_wmb();
|
net/mlx5: fix link status behavior
This behavior is mixed between what should be handled by the application
and what is under PMD responsibility.
According to DPDK API:
- link_update() should only query the link status [1]
- link_set_{up,down}() should only set the link to the according status [1]
- dev_{start,stop}() should enable/disable traffic reception/emission [2]
On this PMD, the link status is retrieved from the net device associated
owned by the Linux Kernel, it does not means that even when this interface
is down, the PMD cannot send/receive traffic from the NIC those two
information are unrelated, until the physical port is active and has a
link, the PMD can receive/send traffic on the wire.
According to DPDK API, calling the rte_eth_dev_start() even when the Linux
interface link is down is then possible and allowed, as the traffic will
flow between the DPDK application and the Physical port.
This also means that a synchronization between the Linux interface and the
DPDK application remains under the DPDK application responsibility.
To handle such synchronization the application should behave as the
following scheme, to start:
rte_eth_get_link(port_id, &link);
if (link.link_status == ETH_DOWN)
rte_eth_dev_set_link_up(port_id);
rte_eth_dev_start(port_id);
Taking in account the possible returned values for each function.
and to stop:
rte_eth_dev_stop(port_id);
rte_eth_dev_set_link_down(port_id);
The application should also set the LSC interrupt callbacks to catch and
behave accordingly when the administrator set the Linux device down/up.
The same callbacks are called when the link on the medium falls/raise.
[1] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev_core.h
[2] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev.h#n1677
Fixes: c7bf62255edf ("net/mlx5: fix handling link status event")
Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start")
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
2018-03-12 13:43:18 +00:00
|
|
|
dev->tx_pkt_burst = mlx5_select_tx_function(dev);
|
|
|
|
dev->rx_pkt_burst = mlx5_select_rx_function(dev);
|
2019-04-01 21:12:56 +00:00
|
|
|
/* Enable datapath on secondary process. */
|
|
|
|
mlx5_mp_req_start_rxtx(dev);
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_dev_interrupt_handler_install(dev);
|
2017-02-22 09:57:52 +00:00
|
|
|
return 0;
|
|
|
|
error:
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = rte_errno; /* Save rte_errno before cleanup. */
|
2017-02-22 09:57:52 +00:00
|
|
|
/* Rollback. */
|
2017-10-09 14:44:55 +00:00
|
|
|
dev->data->dev_started = 0;
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_flow_stop(dev, &priv->flows);
|
|
|
|
mlx5_traffic_disable(dev);
|
|
|
|
mlx5_txq_stop(dev);
|
|
|
|
mlx5_rxq_stop(dev);
|
2018-03-05 12:21:06 +00:00
|
|
|
rte_errno = ret; /* Restore rte_errno. */
|
|
|
|
return -rte_errno;
|
2015-10-30 18:52:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* DPDK callback to stop the device.
|
|
|
|
*
|
|
|
|
* Simulate device stop by detaching all configured flows.
|
|
|
|
*
|
|
|
|
* @param dev
|
|
|
|
* Pointer to Ethernet device structure.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
mlx5_dev_stop(struct rte_eth_dev *dev)
|
|
|
|
{
|
2019-02-21 09:29:14 +00:00
|
|
|
struct mlx5_priv *priv = dev->data->dev_private;
|
2015-10-30 18:52:33 +00:00
|
|
|
|
2017-10-09 14:44:43 +00:00
|
|
|
dev->data->dev_started = 0;
|
|
|
|
/* Prevent crashes when queues are still in use. */
|
|
|
|
dev->rx_pkt_burst = removed_rx_burst;
|
|
|
|
dev->tx_pkt_burst = removed_tx_burst;
|
|
|
|
rte_wmb();
|
2019-04-01 21:12:56 +00:00
|
|
|
/* Disable datapath on secondary process. */
|
|
|
|
mlx5_mp_req_stop_rxtx(dev);
|
2017-10-09 14:44:43 +00:00
|
|
|
usleep(1000 * priv->rxqs_n);
|
2018-07-23 20:57:04 +00:00
|
|
|
DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_flow_stop(dev, &priv->flows);
|
|
|
|
mlx5_traffic_disable(dev);
|
|
|
|
mlx5_rx_intr_vec_disable(dev);
|
|
|
|
mlx5_dev_interrupt_handler_uninstall(dev);
|
|
|
|
mlx5_txq_stop(dev);
|
|
|
|
mlx5_rxq_stop(dev);
|
2017-10-09 14:44:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Enable traffic flows configured by control plane
|
|
|
|
*
|
2018-03-05 12:21:04 +00:00
|
|
|
* @param dev
|
2017-10-09 14:44:55 +00:00
|
|
|
* Pointer to Ethernet device private data.
|
|
|
|
* @param dev
|
|
|
|
* Pointer to Ethernet device structure.
|
|
|
|
*
|
|
|
|
* @return
|
2018-03-05 12:21:06 +00:00
|
|
|
* 0 on success, a negative errno value otherwise and rte_errno is set.
|
2017-10-09 14:44:55 +00:00
|
|
|
*/
|
|
|
|
int
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_traffic_enable(struct rte_eth_dev *dev)
|
2017-10-09 14:44:55 +00:00
|
|
|
{
|
2019-02-21 09:29:14 +00:00
|
|
|
struct mlx5_priv *priv = dev->data->dev_private;
|
2017-10-24 15:18:17 +00:00
|
|
|
struct rte_flow_item_eth bcast = {
|
|
|
|
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
|
|
|
|
};
|
|
|
|
struct rte_flow_item_eth ipv6_multi_spec = {
|
|
|
|
.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
|
|
|
|
};
|
|
|
|
struct rte_flow_item_eth ipv6_multi_mask = {
|
|
|
|
.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
|
|
|
|
};
|
|
|
|
struct rte_flow_item_eth unicast = {
|
|
|
|
.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
|
|
|
|
};
|
|
|
|
struct rte_flow_item_eth unicast_mask = {
|
|
|
|
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
|
|
|
|
};
|
|
|
|
const unsigned int vlan_filter_n = priv->vlan_filter_n;
|
2019-05-21 16:13:03 +00:00
|
|
|
const struct rte_ether_addr cmp = {
|
2017-10-24 15:18:17 +00:00
|
|
|
.addr_bytes = "\x00\x00\x00\x00\x00\x00",
|
|
|
|
};
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int j;
|
|
|
|
int ret;
|
|
|
|
|
2017-10-09 14:44:55 +00:00
|
|
|
if (priv->isolated)
|
|
|
|
return 0;
|
|
|
|
if (dev->data->promiscuous) {
|
|
|
|
struct rte_flow_item_eth promisc = {
|
|
|
|
.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
|
|
|
|
.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
|
|
|
|
.type = 0,
|
|
|
|
};
|
|
|
|
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
|
|
|
|
if (ret)
|
|
|
|
goto error;
|
2017-10-24 15:18:17 +00:00
|
|
|
}
|
|
|
|
if (dev->data->all_multicast) {
|
2017-10-09 14:44:55 +00:00
|
|
|
struct rte_flow_item_eth multicast = {
|
|
|
|
.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
|
2017-10-24 15:18:17 +00:00
|
|
|
.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
|
2017-10-09 14:44:55 +00:00
|
|
|
.type = 0,
|
|
|
|
};
|
|
|
|
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
|
|
|
|
if (ret)
|
|
|
|
goto error;
|
2017-10-09 14:44:55 +00:00
|
|
|
} else {
|
2017-10-24 15:18:17 +00:00
|
|
|
/* Add broadcast/multicast flows. */
|
|
|
|
for (i = 0; i != vlan_filter_n; ++i) {
|
|
|
|
uint16_t vlan = priv->vlan_filter[i];
|
|
|
|
|
|
|
|
struct rte_flow_item_vlan vlan_spec = {
|
|
|
|
.tci = rte_cpu_to_be_16(vlan),
|
|
|
|
};
|
2018-07-23 07:18:40 +00:00
|
|
|
struct rte_flow_item_vlan vlan_mask =
|
|
|
|
rte_flow_item_vlan_mask;
|
2017-10-09 14:44:55 +00:00
|
|
|
|
2017-10-24 15:18:17 +00:00
|
|
|
ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
|
|
|
|
&vlan_spec, &vlan_mask);
|
|
|
|
if (ret)
|
|
|
|
goto error;
|
|
|
|
ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
|
|
|
|
&ipv6_multi_mask,
|
|
|
|
&vlan_spec, &vlan_mask);
|
|
|
|
if (ret)
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (!vlan_filter_n) {
|
|
|
|
ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
|
|
|
|
if (ret)
|
|
|
|
goto error;
|
|
|
|
ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
|
|
|
|
&ipv6_multi_mask);
|
|
|
|
if (ret)
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Add MAC address flows. */
|
|
|
|
for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
|
2019-05-21 16:13:03 +00:00
|
|
|
struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
|
2017-10-09 14:44:55 +00:00
|
|
|
|
2017-10-24 15:18:17 +00:00
|
|
|
if (!memcmp(mac, &cmp, sizeof(*mac)))
|
|
|
|
continue;
|
|
|
|
memcpy(&unicast.dst.addr_bytes,
|
|
|
|
mac->addr_bytes,
|
2019-05-21 16:13:05 +00:00
|
|
|
RTE_ETHER_ADDR_LEN);
|
2017-10-24 15:18:17 +00:00
|
|
|
for (j = 0; j != vlan_filter_n; ++j) {
|
|
|
|
uint16_t vlan = priv->vlan_filter[j];
|
2017-10-09 14:44:55 +00:00
|
|
|
|
2017-10-24 15:18:17 +00:00
|
|
|
struct rte_flow_item_vlan vlan_spec = {
|
|
|
|
.tci = rte_cpu_to_be_16(vlan),
|
|
|
|
};
|
2018-07-23 07:18:40 +00:00
|
|
|
struct rte_flow_item_vlan vlan_mask =
|
|
|
|
rte_flow_item_vlan_mask;
|
2017-10-09 14:44:55 +00:00
|
|
|
|
2017-10-24 15:18:17 +00:00
|
|
|
ret = mlx5_ctrl_flow_vlan(dev, &unicast,
|
|
|
|
&unicast_mask,
|
|
|
|
&vlan_spec,
|
|
|
|
&vlan_mask);
|
|
|
|
if (ret)
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (!vlan_filter_n) {
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
|
2017-10-24 15:18:17 +00:00
|
|
|
if (ret)
|
|
|
|
goto error;
|
2017-10-09 14:44:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
error:
|
2018-03-05 12:21:06 +00:00
|
|
|
ret = rte_errno; /* Save rte_errno before cleanup. */
|
|
|
|
mlx5_flow_list_flush(dev, &priv->ctrl_flows);
|
|
|
|
rte_errno = ret; /* Restore rte_errno. */
|
|
|
|
return -rte_errno;
|
2017-10-09 14:44:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Disable traffic flows configured by control plane
|
|
|
|
*
|
|
|
|
* @param dev
|
|
|
|
* Pointer to Ethernet device private data.
|
|
|
|
*/
|
2018-03-05 12:21:05 +00:00
|
|
|
void
|
2018-03-05 12:21:04 +00:00
|
|
|
mlx5_traffic_disable(struct rte_eth_dev *dev)
|
2017-10-09 14:44:55 +00:00
|
|
|
{
|
2019-02-21 09:29:14 +00:00
|
|
|
struct mlx5_priv *priv = dev->data->dev_private;
|
2018-03-05 12:21:04 +00:00
|
|
|
|
|
|
|
mlx5_flow_list_flush(dev, &priv->ctrl_flows);
|
2017-10-09 14:44:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Restart traffic flows configured by control plane
|
|
|
|
*
|
|
|
|
* @param dev
|
2018-03-05 12:21:04 +00:00
|
|
|
* Pointer to Ethernet device private data.
|
2017-10-09 14:44:55 +00:00
|
|
|
*
|
|
|
|
* @return
|
2018-03-05 12:21:06 +00:00
|
|
|
* 0 on success, a negative errno value otherwise and rte_errno is set.
|
2017-10-09 14:44:55 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
mlx5_traffic_restart(struct rte_eth_dev *dev)
|
|
|
|
{
|
2018-03-05 12:21:04 +00:00
|
|
|
if (dev->data->dev_started) {
|
|
|
|
mlx5_traffic_disable(dev);
|
2018-03-05 12:21:06 +00:00
|
|
|
return mlx5_traffic_enable(dev);
|
2018-03-05 12:21:04 +00:00
|
|
|
}
|
2017-10-09 14:44:55 +00:00
|
|
|
return 0;
|
2015-10-30 18:52:33 +00:00
|
|
|
}
|