numam-dpdk/drivers/net/mlx5/mlx5_trigger.c

408 lines
9.8 KiB
C
Raw Normal View History

/*-
* BSD LICENSE
*
* Copyright 2015 6WIND S.A.
* Copyright 2015 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of 6WIND S.A. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include <rte_ether.h>
#include <rte_ethdev_driver.h>
#include <rte_interrupts.h>
#include <rte_alarm.h>
#include "mlx5.h"
#include "mlx5_rxtx.h"
#include "mlx5_utils.h"
static void
priv_txq_stop(struct priv *priv)
{
unsigned int i;
for (i = 0; i != priv->txqs_n; ++i)
mlx5_priv_txq_release(priv, i);
}
static int
priv_txq_start(struct priv *priv)
{
unsigned int i;
int ret = 0;
/* Add memory regions to Tx queues. */
for (i = 0; i != priv->txqs_n; ++i) {
unsigned int idx = 0;
struct mlx5_mr *mr;
struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i);
if (!txq_ctrl)
continue;
LIST_FOREACH(mr, &priv->mr, next) {
priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++);
if (idx == MLX5_PMD_TX_MP_CACHE)
break;
}
txq_alloc_elts(txq_ctrl);
txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i);
if (!txq_ctrl->ibv) {
ret = ENOMEM;
goto error;
}
}
return -ret;
error:
priv_txq_stop(priv);
return -ret;
}
static void
priv_rxq_stop(struct priv *priv)
{
unsigned int i;
for (i = 0; i != priv->rxqs_n; ++i)
mlx5_priv_rxq_release(priv, i);
}
static int
priv_rxq_start(struct priv *priv)
{
unsigned int i;
int ret = 0;
for (i = 0; i != priv->rxqs_n; ++i) {
struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_priv_rxq_get(priv, i);
if (!rxq_ctrl)
continue;
ret = rxq_alloc_elts(rxq_ctrl);
if (ret)
goto error;
rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, i);
if (!rxq_ctrl->ibv) {
ret = ENOMEM;
goto error;
}
}
return -ret;
error:
priv_rxq_stop(priv);
return -ret;
}
/**
* DPDK callback to start the device.
*
* Simulate device start by attaching all configured flows.
*
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success, negative errno value on failure.
*/
int
mlx5_dev_start(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
struct mlx5_mr *mr = NULL;
mlx5: refactor Rx code for the new verbs RSS API The new Verbs RSS API is lower-level than the previous one and much more flexible but requires RX queues to use Work Queues (WQs) internally instead of Queue Pairs (QPs), which are grouped in an indirection table used by a new kind of hash RX QPs. Hash RX QPs and the indirection table together replace the parent RSS QP while WQs are mostly similar to child QPs. RSS hash key is not configurable yet. Summary of changes: - Individual DPDK RX queues do not store flow properties anymore, this info is now part of the hash RX queues. - All functions affecting the parent queue when RSS is enabled or the basic queues otherwise are modified to affect hash RX queues instead. - Hash RX queues are also used when a single DPDK RX queue is configured (no RSS) to remove that special case. - Hash RX queues and indirection table are created/destroyed when device is started/stopped in addition to create/destroy flows. - Contrary to QPs, WQs are moved to the "ready" state before posting RX buffers, otherwise they are ignored. - Resource domain information is added to WQs for better performance. - CQs are not resized anymore when switching between non-SG and SG modes as it does not work correctly with WQs. Use the largest possible size instead, since CQ size does not have to be the same as the number of elements in the RX queue. This also applies to the maximum number of outstanding WRs in a WQ (max_recv_wr). Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Or Ami <ora@mellanox.com> Signed-off-by: Yaacov Hazan <yaacovh@mellanox.com>
2015-10-30 18:55:06 +00:00
int err;
dev->data->dev_started = 1;
priv_lock(priv);
err = priv_flow_create_drop_queue(priv);
if (err) {
ERROR("%p: Drop queue allocation failed: %s",
(void *)dev, strerror(err));
goto error;
}
mlx5: refactor Rx code for the new verbs RSS API The new Verbs RSS API is lower-level than the previous one and much more flexible but requires RX queues to use Work Queues (WQs) internally instead of Queue Pairs (QPs), which are grouped in an indirection table used by a new kind of hash RX QPs. Hash RX QPs and the indirection table together replace the parent RSS QP while WQs are mostly similar to child QPs. RSS hash key is not configurable yet. Summary of changes: - Individual DPDK RX queues do not store flow properties anymore, this info is now part of the hash RX queues. - All functions affecting the parent queue when RSS is enabled or the basic queues otherwise are modified to affect hash RX queues instead. - Hash RX queues are also used when a single DPDK RX queue is configured (no RSS) to remove that special case. - Hash RX queues and indirection table are created/destroyed when device is started/stopped in addition to create/destroy flows. - Contrary to QPs, WQs are moved to the "ready" state before posting RX buffers, otherwise they are ignored. - Resource domain information is added to WQs for better performance. - CQs are not resized anymore when switching between non-SG and SG modes as it does not work correctly with WQs. Use the largest possible size instead, since CQ size does not have to be the same as the number of elements in the RX queue. This also applies to the maximum number of outstanding WRs in a WQ (max_recv_wr). Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Or Ami <ora@mellanox.com> Signed-off-by: Yaacov Hazan <yaacovh@mellanox.com>
2015-10-30 18:55:06 +00:00
DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
rte_mempool_walk(mlx5_mp2mr_iter, priv);
err = priv_txq_start(priv);
if (err) {
ERROR("%p: TXQ allocation failed: %s",
(void *)dev, strerror(err));
goto error;
}
err = priv_rxq_start(priv);
if (err) {
ERROR("%p: RXQ allocation failed: %s",
(void *)dev, strerror(err));
goto error;
}
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
err = priv_rx_intr_vec_enable(priv);
if (err) {
ERROR("%p: RX interrupt vector creation failed",
(void *)priv);
goto error;
}
priv_xstats_init(priv);
/* Update link status and Tx/Rx callbacks for the first time. */
memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
priv_link_update(priv, 1);
priv_dev_interrupt_handler_install(priv, dev);
priv_unlock(priv);
return 0;
error:
/* Rollback. */
dev->data->dev_started = 0;
for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
priv_mr_release(priv, mr);
priv_flow_stop(priv, &priv->flows);
priv_dev_traffic_disable(priv, dev);
priv_txq_stop(priv);
priv_rxq_stop(priv);
priv_flow_delete_drop_queue(priv);
priv_unlock(priv);
return err;
}
/**
* DPDK callback to stop the device.
*
* Simulate device stop by detaching all configured flows.
*
* @param dev
* Pointer to Ethernet device structure.
*/
void
mlx5_dev_stop(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
struct mlx5_mr *mr;
priv_lock(priv);
dev->data->dev_started = 0;
/* Prevent crashes when queues are still in use. */
dev->rx_pkt_burst = removed_rx_burst;
dev->tx_pkt_burst = removed_tx_burst;
rte_wmb();
usleep(1000 * priv->rxqs_n);
mlx5: refactor Rx code for the new verbs RSS API The new Verbs RSS API is lower-level than the previous one and much more flexible but requires RX queues to use Work Queues (WQs) internally instead of Queue Pairs (QPs), which are grouped in an indirection table used by a new kind of hash RX QPs. Hash RX QPs and the indirection table together replace the parent RSS QP while WQs are mostly similar to child QPs. RSS hash key is not configurable yet. Summary of changes: - Individual DPDK RX queues do not store flow properties anymore, this info is now part of the hash RX queues. - All functions affecting the parent queue when RSS is enabled or the basic queues otherwise are modified to affect hash RX queues instead. - Hash RX queues are also used when a single DPDK RX queue is configured (no RSS) to remove that special case. - Hash RX queues and indirection table are created/destroyed when device is started/stopped in addition to create/destroy flows. - Contrary to QPs, WQs are moved to the "ready" state before posting RX buffers, otherwise they are ignored. - Resource domain information is added to WQs for better performance. - CQs are not resized anymore when switching between non-SG and SG modes as it does not work correctly with WQs. Use the largest possible size instead, since CQ size does not have to be the same as the number of elements in the RX queue. This also applies to the maximum number of outstanding WRs in a WQ (max_recv_wr). Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Olga Shern <olgas@mellanox.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Or Ami <ora@mellanox.com> Signed-off-by: Yaacov Hazan <yaacovh@mellanox.com>
2015-10-30 18:55:06 +00:00
DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
priv_flow_stop(priv, &priv->flows);
priv_dev_traffic_disable(priv, dev);
net/mlx5: fix Rx interrupts management This commit addresses various issues that may lead to undefined behavior when configuring Rx interrupts. While failure to create a Rx queue completion channel in rxq_ctrl_setup() prevents that queue from being created, existing queues still have theirs. Since the error handler disables dev_conf.intr_conf.rxq as well, subsequent calls to rxq_ctrl_setup() create Rx queues without interrupts. This leads to a scenario where not all Rx queues support interrupts; missing checks on the presence of completion channels may crash the application. Considering that the PMD is not supposed to disable user-provided configuration parameters (dev_conf.intr_conf.rxq), and that these can change for subsequent rxq_ctrl_setup() calls anyway, properly supporting a mixed mode where not all Rx queues have interrupts enabled is a better approach. To do so with a minimum set of changes, priv_intr_efd_enable() and priv_create_intr_vec() are first refactored as a single priv_rx_intr_vec_enable() function (same for their "disable" counterparts). Since they had to be used together, there was no point in keeping them separate. Remaining changes: - Always clean up before reconfiguring interrupts to avoid memory leaks. - Always clean up when closing the device. - Use malloc()/free() instead of their rte_*() counterparts since there is no need to store the vector in huge pages-backed memory. - Allow more Rx queues than the size of the event file descriptor array as long as Rx interrupts are not requested on all of them. - Properly clean up interrupt handle when disabling Rx interrupts (nb_efd and intr_vec reset to 0). - Check completion channel presence while toggling Rx interrupts on a given queue. Fixes: 3c7d44af252a ("net/mlx5: support user space Rx interrupt event") Cc: stable@dpdk.org Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
2017-06-14 11:49:17 +00:00
priv_rx_intr_vec_disable(priv);
priv_dev_interrupt_handler_uninstall(priv, dev);
priv_txq_stop(priv);
priv_rxq_stop(priv);
for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
priv_mr_release(priv, mr);
priv_flow_delete_drop_queue(priv);
priv_unlock(priv);
}
/**
* Enable traffic flows configured by control plane
*
* @param priv
* Pointer to Ethernet device private data.
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success.
*/
int
priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
{
struct rte_flow_item_eth bcast = {
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
};
struct rte_flow_item_eth ipv6_multi_spec = {
.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
};
struct rte_flow_item_eth ipv6_multi_mask = {
.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
};
struct rte_flow_item_eth unicast = {
.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
};
struct rte_flow_item_eth unicast_mask = {
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
};
const unsigned int vlan_filter_n = priv->vlan_filter_n;
const struct ether_addr cmp = {
.addr_bytes = "\x00\x00\x00\x00\x00\x00",
};
unsigned int i;
unsigned int j;
int ret;
if (priv->isolated)
return 0;
if (dev->data->promiscuous) {
struct rte_flow_item_eth promisc = {
.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
.type = 0,
};
claim_zero(mlx5_ctrl_flow(dev, &promisc, &promisc));
return 0;
}
if (dev->data->all_multicast) {
struct rte_flow_item_eth multicast = {
.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
.type = 0,
};
claim_zero(mlx5_ctrl_flow(dev, &multicast, &multicast));
} else {
/* Add broadcast/multicast flows. */
for (i = 0; i != vlan_filter_n; ++i) {
uint16_t vlan = priv->vlan_filter[i];
struct rte_flow_item_vlan vlan_spec = {
.tci = rte_cpu_to_be_16(vlan),
};
struct rte_flow_item_vlan vlan_mask = {
.tci = 0xffff,
};
ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
&vlan_spec, &vlan_mask);
if (ret)
goto error;
ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
&ipv6_multi_mask,
&vlan_spec, &vlan_mask);
if (ret)
goto error;
}
if (!vlan_filter_n) {
ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
if (ret)
goto error;
ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
&ipv6_multi_mask);
if (ret)
goto error;
}
}
/* Add MAC address flows. */
for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
struct ether_addr *mac = &dev->data->mac_addrs[i];
if (!memcmp(mac, &cmp, sizeof(*mac)))
continue;
memcpy(&unicast.dst.addr_bytes,
mac->addr_bytes,
ETHER_ADDR_LEN);
for (j = 0; j != vlan_filter_n; ++j) {
uint16_t vlan = priv->vlan_filter[j];
struct rte_flow_item_vlan vlan_spec = {
.tci = rte_cpu_to_be_16(vlan),
};
struct rte_flow_item_vlan vlan_mask = {
.tci = 0xffff,
};
ret = mlx5_ctrl_flow_vlan(dev, &unicast,
&unicast_mask,
&vlan_spec,
&vlan_mask);
if (ret)
goto error;
}
if (!vlan_filter_n) {
ret = mlx5_ctrl_flow(dev, &unicast,
&unicast_mask);
if (ret)
goto error;
}
}
return 0;
error:
return rte_errno;
}
/**
* Disable traffic flows configured by control plane
*
* @param priv
* Pointer to Ethernet device private data.
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success.
*/
int
priv_dev_traffic_disable(struct priv *priv, struct rte_eth_dev *dev)
{
(void)dev;
priv_flow_flush(priv, &priv->ctrl_flows);
return 0;
}
/**
* Restart traffic flows configured by control plane
*
* @param priv
* Pointer to Ethernet device private data.
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success.
*/
int
priv_dev_traffic_restart(struct priv *priv, struct rte_eth_dev *dev)
{
if (dev->data->dev_started) {
priv_dev_traffic_disable(priv, dev);
priv_dev_traffic_enable(priv, dev);
}
return 0;
}
/**
* Restart traffic flows configured by control plane
*
* @param dev
* Pointer to Ethernet device structure.
*
* @return
* 0 on success.
*/
int
mlx5_traffic_restart(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
priv_lock(priv);
priv_dev_traffic_restart(priv, dev);
priv_unlock(priv);
return 0;
}