numam-dpdk/drivers/net/failsafe/failsafe_ether.c
Gaetan Rivet 94a6f2def2 net/failsafe: avoid crash on malformed ethdev
Some PMD do not respect the eth_dev API when allocating their
rte_eth_dev. As a result, on device add event resulting from
rte_eth_dev_probing_finish() call, the eth_dev processed is incomplete.

The segfault is a good way to focus the developer on the issue, but does
not inspire confidence. Instead, warn the user of the error repeatedly.

The failsafe PMD can warn of the issue and continue. It will repeatedly
attempt to initialize the failed port and complain about it, which
should result in the same developer focus but with less crashing.

Signed-off-by: Gaetan Rivet <grive@u256.net>
2020-05-11 22:27:39 +02:00

639 lines
16 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2017 6WIND S.A.
* Copyright 2017 Mellanox Technologies, Ltd
*/
#include <unistd.h>
#include <rte_flow.h>
#include <rte_flow_driver.h>
#include <rte_cycles.h>
#include "failsafe_private.h"
/** Print a message out of a flow error. */
static int
fs_flow_complain(struct rte_flow_error *error)
{
static const char *const errstrlist[] = {
[RTE_FLOW_ERROR_TYPE_NONE] = "no error",
[RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
[RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
[RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
[RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
[RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
[RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
[RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
[RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
[RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
[RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
[RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
};
const char *errstr;
char buf[32];
int err = rte_errno;
if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
!errstrlist[error->type])
errstr = "unknown type";
else
errstr = errstrlist[error->type];
ERROR("Caught error type %d (%s): %s%s\n",
error->type, errstr,
error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
error->cause), buf) : "",
error->message ? error->message : "(no stated reason)");
return -err;
}
static int
eth_dev_flow_isolate_set(struct rte_eth_dev *dev,
struct sub_device *sdev)
{
struct rte_flow_error ferror;
int ret;
if (!PRIV(dev)->flow_isolated) {
DEBUG("Flow isolation already disabled");
} else {
DEBUG("Enabling flow isolation");
ret = rte_flow_isolate(PORT_ID(sdev),
PRIV(dev)->flow_isolated,
&ferror);
if (ret) {
fs_flow_complain(&ferror);
return ret;
}
}
return 0;
}
static int
fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct sub_device *sdev)
{
struct rte_eth_dev *edev;
struct rte_vlan_filter_conf *vfc1;
struct rte_vlan_filter_conf *vfc2;
struct rte_flow *flow;
struct rte_flow_error ferror;
uint32_t i;
int ret;
edev = ETH(sdev);
/* RX queue setup */
for (i = 0; i < dev->data->nb_rx_queues; i++) {
struct rxq *rxq;
rxq = dev->data->rx_queues[i];
ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
rxq->info.nb_desc, rxq->socket_id,
&rxq->info.conf, rxq->info.mp);
if (ret) {
ERROR("rx_queue_setup failed");
return ret;
}
}
/* TX queue setup */
for (i = 0; i < dev->data->nb_tx_queues; i++) {
struct txq *txq;
txq = dev->data->tx_queues[i];
ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
txq->info.nb_desc, txq->socket_id,
&txq->info.conf);
if (ret) {
ERROR("tx_queue_setup failed");
return ret;
}
}
/* dev_link.link_status */
if (dev->data->dev_link.link_status !=
edev->data->dev_link.link_status) {
DEBUG("Configuring link_status");
if (dev->data->dev_link.link_status)
ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
else
ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
if (ret) {
ERROR("Failed to apply link_status");
return ret;
}
} else {
DEBUG("link_status already set");
}
/* promiscuous */
if (dev->data->promiscuous != edev->data->promiscuous) {
DEBUG("Configuring promiscuous");
if (dev->data->promiscuous)
ret = rte_eth_promiscuous_enable(PORT_ID(sdev));
else
ret = rte_eth_promiscuous_disable(PORT_ID(sdev));
if (ret != 0) {
ERROR("Failed to apply promiscuous mode");
return ret;
}
} else {
DEBUG("promiscuous already set");
}
/* all_multicast */
if (dev->data->all_multicast != edev->data->all_multicast) {
DEBUG("Configuring all_multicast");
if (dev->data->all_multicast)
ret = rte_eth_allmulticast_enable(PORT_ID(sdev));
else
ret = rte_eth_allmulticast_disable(PORT_ID(sdev));
if (ret != 0) {
ERROR("Failed to apply allmulticast mode");
return ret;
}
} else {
DEBUG("all_multicast already set");
}
/* MTU */
if (dev->data->mtu != edev->data->mtu) {
DEBUG("Configuring MTU");
ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
if (ret) {
ERROR("Failed to apply MTU");
return ret;
}
} else {
DEBUG("MTU already set");
}
/* default MAC */
DEBUG("Configuring default MAC address");
ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
&dev->data->mac_addrs[0]);
if (ret) {
ERROR("Setting default MAC address failed");
return ret;
}
/* additional MAC */
if (PRIV(dev)->nb_mac_addr > 1)
DEBUG("Configure additional MAC address%s",
(PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
struct rte_ether_addr *ea;
ea = &dev->data->mac_addrs[i];
ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
PRIV(dev)->mac_addr_pool[i]);
if (ret) {
char ea_fmt[RTE_ETHER_ADDR_FMT_SIZE];
rte_ether_format_addr(ea_fmt,
RTE_ETHER_ADDR_FMT_SIZE, ea);
ERROR("Adding MAC address %s failed", ea_fmt);
return ret;
}
}
/*
* Propagate multicast MAC addresses to sub-devices,
* if non zero number of addresses is set.
* The condition is required to avoid breakage of failsafe
* for sub-devices which do not support the operation
* if the feature is really not used.
*/
if (PRIV(dev)->nb_mcast_addr > 0) {
DEBUG("Configuring multicast MAC addresses");
ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
PRIV(dev)->mcast_addrs,
PRIV(dev)->nb_mcast_addr);
if (ret) {
ERROR("Failed to apply multicast MAC addresses");
return ret;
}
}
/* VLAN filter */
vfc1 = &dev->data->vlan_filter_conf;
vfc2 = &edev->data->vlan_filter_conf;
if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
uint64_t vbit;
uint64_t ids;
size_t i;
uint16_t vlan_id;
DEBUG("Configuring VLAN filter");
for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
if (vfc1->ids[i] == 0)
continue;
ids = vfc1->ids[i];
while (ids) {
vlan_id = 64 * i;
/* count trailing zeroes */
vbit = ~ids & (ids - 1);
/* clear least significant bit set */
ids ^= (ids ^ (ids - 1)) ^ vbit;
for (; vbit; vlan_id++)
vbit >>= 1;
ret = rte_eth_dev_vlan_filter(
PORT_ID(sdev), vlan_id, 1);
if (ret) {
ERROR("Failed to apply VLAN filter %hu",
vlan_id);
return ret;
}
}
}
} else {
DEBUG("VLAN filter already set");
}
/* rte_flow */
if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
DEBUG("rte_flow already set");
} else {
DEBUG("Resetting rte_flow configuration");
ret = rte_flow_flush(PORT_ID(sdev), &ferror);
if (ret) {
fs_flow_complain(&ferror);
return ret;
}
i = 0;
rte_errno = 0;
DEBUG("Configuring rte_flow");
TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
DEBUG("Creating flow #%" PRIu32, i++);
flow->flows[SUB_ID(sdev)] =
rte_flow_create(PORT_ID(sdev),
flow->rule.attr,
flow->rule.pattern,
flow->rule.actions,
&ferror);
ret = rte_errno;
if (ret)
break;
}
if (ret) {
fs_flow_complain(&ferror);
return ret;
}
}
return 0;
}
static void
fs_dev_remove(struct sub_device *sdev)
{
int ret;
if (sdev == NULL)
return;
switch (sdev->state) {
case DEV_STARTED:
failsafe_rx_intr_uninstall_subdevice(sdev);
rte_eth_dev_stop(PORT_ID(sdev));
sdev->state = DEV_ACTIVE;
/* fallthrough */
case DEV_ACTIVE:
failsafe_eth_dev_unregister_callbacks(sdev);
rte_eth_dev_close(PORT_ID(sdev));
sdev->state = DEV_PROBED;
/* fallthrough */
case DEV_PROBED:
ret = rte_dev_remove(sdev->dev);
if (ret < 0) {
ERROR("Bus detach failed for sub_device %u",
SUB_ID(sdev));
} else {
rte_eth_dev_release_port(ETH(sdev));
}
sdev->state = DEV_PARSED;
/* fallthrough */
case DEV_PARSED:
case DEV_UNDEFINED:
sdev->state = DEV_UNDEFINED;
sdev->sdev_port_id = RTE_MAX_ETHPORTS;
/* the end */
break;
}
sdev->remove = 0;
failsafe_hotplug_alarm_install(fs_dev(sdev));
}
static void
fs_dev_stats_save(struct sub_device *sdev)
{
struct rte_eth_stats stats;
int err;
/* Attempt to read current stats. */
err = rte_eth_stats_get(PORT_ID(sdev), &stats);
if (err) {
uint64_t timestamp = sdev->stats_snapshot.timestamp;
WARN("Could not access latest statistics from sub-device %d.\n",
SUB_ID(sdev));
if (timestamp != 0)
WARN("Using latest snapshot taken before %"PRIu64" seconds.\n",
(rte_rdtsc() - timestamp) / rte_get_tsc_hz());
}
failsafe_stats_increment
(&PRIV(fs_dev(sdev))->stats_accumulator,
err ? &sdev->stats_snapshot.stats : &stats);
memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
}
static inline int
fs_rxtx_clean(struct sub_device *sdev)
{
uint16_t i;
for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
if (FS_ATOMIC_RX(sdev, i))
return 0;
for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
if (FS_ATOMIC_TX(sdev, i))
return 0;
return 1;
}
void
failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev)
{
int ret;
if (sdev == NULL)
return;
if (sdev->rmv_callback) {
ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
RTE_ETH_EVENT_INTR_RMV,
failsafe_eth_rmv_event_callback,
sdev);
if (ret)
WARN("Failed to unregister RMV callback for sub_device"
" %d", SUB_ID(sdev));
sdev->rmv_callback = 0;
}
if (sdev->lsc_callback) {
ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
RTE_ETH_EVENT_INTR_LSC,
failsafe_eth_lsc_event_callback,
sdev);
if (ret)
WARN("Failed to unregister LSC callback for sub_device"
" %d", SUB_ID(sdev));
sdev->lsc_callback = 0;
}
}
void
failsafe_dev_remove(struct rte_eth_dev *dev)
{
struct sub_device *sdev;
uint8_t i;
FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
if (sdev->remove && fs_rxtx_clean(sdev)) {
if (fs_lock(dev, 1) != 0)
return;
fs_dev_stats_save(sdev);
fs_dev_remove(sdev);
fs_unlock(dev, 1);
}
}
static int
failsafe_eth_dev_rx_queues_sync(struct rte_eth_dev *dev)
{
struct rxq *rxq;
int ret;
uint16_t i;
for (i = 0; i < dev->data->nb_rx_queues; i++) {
rxq = dev->data->rx_queues[i];
if (rxq->info.conf.rx_deferred_start &&
dev->data->rx_queue_state[i] ==
RTE_ETH_QUEUE_STATE_STARTED) {
/*
* The subdevice Rx queue does not launch on device
* start if deferred start flag is set. It needs to be
* started manually in case an appropriate failsafe Rx
* queue has been started earlier.
*/
ret = dev->dev_ops->rx_queue_start(dev, i);
if (ret) {
ERROR("Could not synchronize Rx queue %d", i);
return ret;
}
} else if (dev->data->rx_queue_state[i] ==
RTE_ETH_QUEUE_STATE_STOPPED) {
/*
* The subdevice Rx queue needs to be stopped manually
* in case an appropriate failsafe Rx queue has been
* stopped earlier.
*/
ret = dev->dev_ops->rx_queue_stop(dev, i);
if (ret) {
ERROR("Could not synchronize Rx queue %d", i);
return ret;
}
}
}
return 0;
}
static int
failsafe_eth_dev_tx_queues_sync(struct rte_eth_dev *dev)
{
struct txq *txq;
int ret;
uint16_t i;
for (i = 0; i < dev->data->nb_tx_queues; i++) {
txq = dev->data->tx_queues[i];
if (txq->info.conf.tx_deferred_start &&
dev->data->tx_queue_state[i] ==
RTE_ETH_QUEUE_STATE_STARTED) {
/*
* The subdevice Tx queue does not launch on device
* start if deferred start flag is set. It needs to be
* started manually in case an appropriate failsafe Tx
* queue has been started earlier.
*/
ret = dev->dev_ops->tx_queue_start(dev, i);
if (ret) {
ERROR("Could not synchronize Tx queue %d", i);
return ret;
}
} else if (dev->data->tx_queue_state[i] ==
RTE_ETH_QUEUE_STATE_STOPPED) {
/*
* The subdevice Tx queue needs to be stopped manually
* in case an appropriate failsafe Tx queue has been
* stopped earlier.
*/
ret = dev->dev_ops->tx_queue_stop(dev, i);
if (ret) {
ERROR("Could not synchronize Tx queue %d", i);
return ret;
}
}
}
return 0;
}
int
failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
{
struct sub_device *sdev;
uint32_t inactive;
int ret;
uint8_t i;
if (PRIV(dev)->state < DEV_PARSED)
return 0;
ret = failsafe_args_parse_subs(dev);
if (ret)
goto err_remove;
if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
if (ret)
goto err_remove;
if (PRIV(dev)->state < DEV_ACTIVE)
return 0;
inactive = 0;
FOREACH_SUBDEV(sdev, i, dev) {
if (sdev->state == DEV_PROBED) {
inactive |= UINT32_C(1) << i;
ret = eth_dev_flow_isolate_set(dev, sdev);
if (ret) {
ERROR("Could not apply configuration to sub_device %d",
i);
goto err_remove;
}
}
}
ret = dev->dev_ops->dev_configure(dev);
if (ret)
goto err_remove;
FOREACH_SUBDEV(sdev, i, dev) {
if (inactive & (UINT32_C(1) << i)) {
ret = fs_eth_dev_conf_apply(dev, sdev);
if (ret) {
ERROR("Could not apply configuration to sub_device %d",
i);
goto err_remove;
}
}
}
/*
* If new devices have been configured, check if
* the link state has changed.
*/
if (inactive)
dev->dev_ops->link_update(dev, 1);
if (PRIV(dev)->state < DEV_STARTED)
return 0;
ret = dev->dev_ops->dev_start(dev);
if (ret)
goto err_remove;
ret = failsafe_eth_dev_rx_queues_sync(dev);
if (ret)
goto err_remove;
ret = failsafe_eth_dev_tx_queues_sync(dev);
if (ret)
goto err_remove;
return 0;
err_remove:
FOREACH_SUBDEV(sdev, i, dev)
if (sdev->state != PRIV(dev)->state)
sdev->remove = 1;
return ret;
}
void
failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from)
{
uint32_t i;
RTE_ASSERT(to != NULL && from != NULL);
to->ipackets += from->ipackets;
to->opackets += from->opackets;
to->ibytes += from->ibytes;
to->obytes += from->obytes;
to->imissed += from->imissed;
to->ierrors += from->ierrors;
to->oerrors += from->oerrors;
to->rx_nombuf += from->rx_nombuf;
for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) {
to->q_ipackets[i] += from->q_ipackets[i];
to->q_opackets[i] += from->q_opackets[i];
to->q_ibytes[i] += from->q_ibytes[i];
to->q_obytes[i] += from->q_obytes[i];
to->q_errors[i] += from->q_errors[i];
}
}
int
failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
enum rte_eth_event_type event __rte_unused,
void *cb_arg, void *out __rte_unused)
{
struct sub_device *sdev = cb_arg;
fs_lock(fs_dev(sdev), 0);
/* Switch as soon as possible tx_dev. */
fs_switch_dev(fs_dev(sdev), sdev);
/* Use safe bursts in any case. */
failsafe_set_burst_fn(fs_dev(sdev), 1);
/*
* Async removal, the sub-PMD will try to unregister
* the callback at the source of the current thread context.
*/
sdev->remove = 1;
fs_unlock(fs_dev(sdev), 0);
return 0;
}
int
failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
enum rte_eth_event_type event __rte_unused,
void *cb_arg, void *out __rte_unused)
{
struct rte_eth_dev *dev = cb_arg;
int ret;
ret = dev->dev_ops->link_update(dev, 0);
/* We must pass on the LSC event */
if (ret)
return _rte_eth_dev_callback_process(dev,
RTE_ETH_EVENT_INTR_LSC,
NULL);
else
return 0;
}
/* Take sub-device ownership before it becomes exposed to the application. */
int
failsafe_eth_new_event_callback(uint16_t port_id,
enum rte_eth_event_type event __rte_unused,
void *cb_arg, void *out __rte_unused)
{
struct rte_eth_dev *fs_dev = cb_arg;
struct sub_device *sdev;
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
uint8_t i;
FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
if (sdev->state >= DEV_PROBED)
continue;
if (dev->device == NULL) {
WARN("Trying to probe malformed device %s.\n",
sdev->devargs.name);
continue;
}
if (strcmp(sdev->devargs.name, dev->device->name) != 0)
continue;
rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);
/* The actual owner will be checked after the port probing. */
break;
}
return 0;
}