numam-dpdk/drivers/net/failsafe/failsafe_flow.c
Matan Azrad 655fcd68c7 net/failsafe: fix hotplug races
Fail-safe uses a periodic alarm mechanism, running from the host
thread, to manage the hot-plug events of its sub-devices. This
management requires a lot of sub-devices PMDs operations
(stop, close, start, configure, etc.).

While the hot-plug alarm runs in the host thread, the application may
call fail-safe operations, which directly trigger the sub-devices PMDs
operations as well. This call may occur from any thread decided by the
application (probably the master thread).

Thus, more than one operation can be executed to a sub-device at the
same time. This can initiate a lot of races in the sub-PMDs.

Moreover, some control operations update the fail-safe internal
databases, which can be used by the alarm mechanism at the same time.
This can also initiate races and crashes.

Fail-safe is the owner of its sub-devices and must synchronize their
use according to the ETHDEV ownership rules.

Synchronize hot-plug management by a new lock mechanism uses a mutex to
atomically defend each critical section in the fail-safe hot-plug
mechanism and control operations to prevent any races between them.

Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
2018-02-13 18:17:30 +01:00

239 lines
5.4 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2017 6WIND S.A.
* Copyright 2017 Mellanox.
*/
#include <sys/queue.h>
#include <rte_malloc.h>
#include <rte_tailq.h>
#include <rte_flow.h>
#include <rte_flow_driver.h>
#include "failsafe_private.h"
static struct rte_flow *
fs_flow_allocate(const struct rte_flow_attr *attr,
const struct rte_flow_item *items,
const struct rte_flow_action *actions)
{
struct rte_flow *flow;
size_t fdsz;
fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
flow = rte_zmalloc(NULL,
sizeof(struct rte_flow) + fdsz,
RTE_CACHE_LINE_SIZE);
if (flow == NULL) {
ERROR("Could not allocate new flow");
return NULL;
}
flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
if (rte_flow_copy(flow->fd, fdsz, attr, items, actions) != fdsz) {
ERROR("Failed to copy flow description");
rte_free(flow);
return NULL;
}
return flow;
}
static void
fs_flow_release(struct rte_flow **flow)
{
rte_free(*flow);
*flow = NULL;
}
static int
fs_flow_validate(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item patterns[],
const struct rte_flow_action actions[],
struct rte_flow_error *error)
{
struct sub_device *sdev;
uint8_t i;
int ret;
fs_lock(dev, 0);
FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
DEBUG("Calling rte_flow_validate on sub_device %d", i);
ret = rte_flow_validate(PORT_ID(sdev),
attr, patterns, actions, error);
if ((ret = fs_err(sdev, ret))) {
ERROR("Operation rte_flow_validate failed for sub_device %d"
" with error %d", i, ret);
fs_unlock(dev, 0);
return ret;
}
}
fs_unlock(dev, 0);
return 0;
}
static struct rte_flow *
fs_flow_create(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item patterns[],
const struct rte_flow_action actions[],
struct rte_flow_error *error)
{
struct sub_device *sdev;
struct rte_flow *flow;
uint8_t i;
fs_lock(dev, 0);
flow = fs_flow_allocate(attr, patterns, actions);
FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
flow->flows[i] = rte_flow_create(PORT_ID(sdev),
attr, patterns, actions, error);
if (flow->flows[i] == NULL && fs_err(sdev, -rte_errno)) {
ERROR("Failed to create flow on sub_device %d",
i);
goto err;
}
}
TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
fs_unlock(dev, 0);
return flow;
err:
FOREACH_SUBDEV(sdev, i, dev) {
if (flow->flows[i] != NULL)
rte_flow_destroy(PORT_ID(sdev),
flow->flows[i], error);
}
fs_flow_release(&flow);
fs_unlock(dev, 0);
return NULL;
}
static int
fs_flow_destroy(struct rte_eth_dev *dev,
struct rte_flow *flow,
struct rte_flow_error *error)
{
struct sub_device *sdev;
uint8_t i;
int ret;
if (flow == NULL) {
ERROR("Invalid flow");
return -EINVAL;
}
ret = 0;
fs_lock(dev, 0);
FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
int local_ret;
if (flow->flows[i] == NULL)
continue;
local_ret = rte_flow_destroy(PORT_ID(sdev),
flow->flows[i], error);
if ((local_ret = fs_err(sdev, local_ret))) {
ERROR("Failed to destroy flow on sub_device %d: %d",
i, local_ret);
if (ret == 0)
ret = local_ret;
}
}
TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
fs_flow_release(&flow);
fs_unlock(dev, 0);
return ret;
}
static int
fs_flow_flush(struct rte_eth_dev *dev,
struct rte_flow_error *error)
{
struct sub_device *sdev;
struct rte_flow *flow;
void *tmp;
uint8_t i;
int ret;
fs_lock(dev, 0);
FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
DEBUG("Calling rte_flow_flush on sub_device %d", i);
ret = rte_flow_flush(PORT_ID(sdev), error);
if ((ret = fs_err(sdev, ret))) {
ERROR("Operation rte_flow_flush failed for sub_device %d"
" with error %d", i, ret);
fs_unlock(dev, 0);
return ret;
}
}
TAILQ_FOREACH_SAFE(flow, &PRIV(dev)->flow_list, next, tmp) {
TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
fs_flow_release(&flow);
}
fs_unlock(dev, 0);
return 0;
}
static int
fs_flow_query(struct rte_eth_dev *dev,
struct rte_flow *flow,
enum rte_flow_action_type type,
void *arg,
struct rte_flow_error *error)
{
struct sub_device *sdev;
fs_lock(dev, 0);
sdev = TX_SUBDEV(dev);
if (sdev != NULL) {
int ret = rte_flow_query(PORT_ID(sdev),
flow->flows[SUB_ID(sdev)],
type, arg, error);
if ((ret = fs_err(sdev, ret))) {
fs_unlock(dev, 0);
return ret;
}
}
fs_unlock(dev, 0);
WARN("No active sub_device to query about its flow");
return -1;
}
static int
fs_flow_isolate(struct rte_eth_dev *dev,
int set,
struct rte_flow_error *error)
{
struct sub_device *sdev;
uint8_t i;
int ret;
fs_lock(dev, 0);
FOREACH_SUBDEV(sdev, i, dev) {
if (sdev->state < DEV_PROBED)
continue;
DEBUG("Calling rte_flow_isolate on sub_device %d", i);
if (PRIV(dev)->flow_isolated != sdev->flow_isolated)
WARN("flow isolation mode of sub_device %d in incoherent state.",
i);
ret = rte_flow_isolate(PORT_ID(sdev), set, error);
if ((ret = fs_err(sdev, ret))) {
ERROR("Operation rte_flow_isolate failed for sub_device %d"
" with error %d", i, ret);
fs_unlock(dev, 0);
return ret;
}
sdev->flow_isolated = set;
}
PRIV(dev)->flow_isolated = set;
fs_unlock(dev, 0);
return 0;
}
const struct rte_flow_ops fs_flow_ops = {
.validate = fs_flow_validate,
.create = fs_flow_create,
.destroy = fs_flow_destroy,
.flush = fs_flow_flush,
.query = fs_flow_query,
.isolate = fs_flow_isolate,
};