numam-dpdk/drivers/net/failsafe/failsafe_private.h
Matan Azrad 7fda13d3a5 net/failsafe: fix sub-device ownership race
There is time between the sub-device port probing by the sub-device PMD
to the sub-device port ownership taking by a fail-safe port.

In this time, the port is available for the application usage. For
example, the port will be exposed to the applications which use
RTE_ETH_FOREACH_DEV iterator.

Thus, ownership unaware applications may manage the port in this time
what may cause a lot of problematic behaviors in the fail-safe
sub-device initialization.

Register to the ethdev NEW event to take the sub-device port ownership
before it becomes exposed to the application.

Fixes: a46f8d584e ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
2018-05-14 22:31:53 +01:00

482 lines
12 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2017 6WIND S.A.
* Copyright 2017 Mellanox Technologies, Ltd
*/
#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
#define _RTE_ETH_FAILSAFE_PRIVATE_H_
#include <sys/queue.h>
#include <pthread.h>
#include <rte_atomic.h>
#include <rte_dev.h>
#include <rte_ethdev_driver.h>
#include <rte_devargs.h>
#include <rte_interrupts.h>
#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"
#define FAILSAFE_OWNER_NAME "Fail-safe"
#define PMD_FAILSAFE_MAC_KVARG "mac"
#define PMD_FAILSAFE_HOTPLUG_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
"exec(<shell command>)," \
"fd(<fd number>)," \
"mac=mac_addr," \
"hotplug_poll=u64" \
""
#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000
#define FAILSAFE_MAX_ETHPORTS 2
#define FAILSAFE_MAX_ETHADDR 128
#define DEVARGS_MAXLEN 4096
enum rxp_service_state {
SS_NO_SERVICE = 0,
SS_REGISTERED,
SS_READY,
SS_RUNNING,
};
/* TYPES */
struct rx_proxy {
/* epoll file descriptor */
int efd;
/* event vector to be used by epoll */
struct rte_epoll_event *evec;
/* rte service id */
uint32_t sid;
/* service core id */
uint32_t scid;
enum rxp_service_state sstate;
};
struct rxq {
struct fs_priv *priv;
uint16_t qid;
/* next sub_device to poll */
struct sub_device *sdev;
unsigned int socket_id;
int event_fd;
unsigned int enable_events:1;
struct rte_eth_rxq_info info;
rte_atomic64_t refcnt[];
};
struct txq {
struct fs_priv *priv;
uint16_t qid;
unsigned int socket_id;
struct rte_eth_txq_info info;
rte_atomic64_t refcnt[];
};
struct rte_flow {
TAILQ_ENTRY(rte_flow) next;
/* sub_flows */
struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS];
/* flow description for synchronization */
struct rte_flow_desc *fd;
};
enum dev_state {
DEV_UNDEFINED,
DEV_PARSED,
DEV_PROBED,
DEV_ACTIVE,
DEV_STARTED,
};
struct fs_stats {
struct rte_eth_stats stats;
uint64_t timestamp;
};
struct sub_device {
/* Exhaustive DPDK device description */
struct sub_device *next;
struct rte_devargs devargs;
struct rte_bus *bus;
struct rte_device *dev;
struct rte_eth_dev *edev;
uint8_t sid;
/* Device state machine */
enum dev_state state;
/* Last stats snapshot passed to user */
struct fs_stats stats_snapshot;
/* Some device are defined as a command line */
char *cmdline;
/* Others are retrieved through a file descriptor */
char *fd_str;
/* fail-safe device backreference */
struct rte_eth_dev *fs_dev;
/* flag calling for recollection */
volatile unsigned int remove:1;
/* flow isolation state */
int flow_isolated:1;
};
struct fs_priv {
struct rte_eth_dev *dev;
/*
* Set of sub_devices.
* subs[0] is the preferred device
* any other is just another slave
*/
struct sub_device *subs;
uint8_t subs_head; /* if head == tail, no subs */
uint8_t subs_tail; /* first invalid */
uint8_t subs_tx; /* current emitting device */
uint8_t current_probed;
/* flow mapping */
TAILQ_HEAD(sub_flows, rte_flow) flow_list;
/* current number of mac_addr slots allocated. */
uint32_t nb_mac_addr;
struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
/* current capabilities */
struct rte_eth_dev_info infos;
struct rte_eth_dev_owner my_owner; /* Unique owner. */
struct rte_intr_handle intr_handle; /* Port interrupt handle. */
/*
* Fail-safe state machine.
* This level will be tracking state of the EAL and eth
* layer at large as defined by the user application.
* It will then steer the sub_devices toward the same
* synchronized state.
*/
enum dev_state state;
struct rte_eth_stats stats_accumulator;
/*
* Rx interrupts/events proxy.
* The PMD issues Rx events to the EAL on behalf of its subdevices,
* it does that by registering an event-fd for each of its queues with
* the EAL. A PMD service thread listens to all the Rx events from the
* subdevices, when an Rx event is issued by a subdevice it will be
* caught by this service with will trigger an Rx event in the
* appropriate failsafe Rx queue.
*/
struct rx_proxy rxp;
pthread_mutex_t hotplug_mutex;
/* Hot-plug mutex is locked by the alarm mechanism. */
volatile unsigned int alarm_lock:1;
unsigned int pending_alarm:1; /* An alarm is pending */
/* flow isolation state */
int flow_isolated:1;
};
/* FAILSAFE_INTR */
int failsafe_rx_intr_install(struct rte_eth_dev *dev);
void failsafe_rx_intr_uninstall(struct rte_eth_dev *dev);
int failsafe_rx_intr_install_subdevice(struct sub_device *sdev);
void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev);
/* MISC */
int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev);
int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);
/* RX / TX */
void set_burst_fn(struct rte_eth_dev *dev, int force_safe);
uint16_t failsafe_rx_burst(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t failsafe_tx_burst(void *txq,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t failsafe_rx_burst_fast(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t failsafe_tx_burst_fast(void *txq,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
/* ARGS */
int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
void failsafe_args_free(struct rte_eth_dev *dev);
int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
int failsafe_args_parse_subs(struct rte_eth_dev *dev);
/* EAL */
int failsafe_eal_init(struct rte_eth_dev *dev);
int failsafe_eal_uninit(struct rte_eth_dev *dev);
/* ETH_DEV */
int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
void failsafe_dev_remove(struct rte_eth_dev *dev);
void failsafe_stats_increment(struct rte_eth_stats *to,
struct rte_eth_stats *from);
int failsafe_eth_rmv_event_callback(uint16_t port_id,
enum rte_eth_event_type type,
void *arg, void *out);
int failsafe_eth_lsc_event_callback(uint16_t port_id,
enum rte_eth_event_type event,
void *cb_arg, void *out);
int failsafe_eth_new_event_callback(uint16_t port_id,
enum rte_eth_event_type event,
void *cb_arg, void *out);
/* GLOBALS */
extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
extern const struct rte_flow_ops fs_flow_ops;
extern uint64_t hotplug_poll;
extern int mac_from_arg;
/* HELPERS */
/* dev: (struct rte_eth_dev *) fail-safe device */
#define PRIV(dev) \
((struct fs_priv *)(dev)->data->dev_private)
/* sdev: (struct sub_device *) */
#define ETH(sdev) \
((sdev)->edev)
/* sdev: (struct sub_device *) */
#define PORT_ID(sdev) \
(ETH(sdev)->data->port_id)
/* sdev: (struct sub_device *) */
#define SUB_ID(sdev) \
((sdev)->sid)
/**
* Stateful iterator construct over fail-safe sub-devices:
* s: (struct sub_device *), iterator
* i: (uint8_t), increment
* dev: (struct rte_eth_dev *), fail-safe ethdev
* state: (enum dev_state), minimum acceptable device state
*/
#define FOREACH_SUBDEV_STATE(s, i, dev, state) \
for (s = fs_find_next((dev), 0, state, &i); \
s != NULL; \
s = fs_find_next((dev), i + 1, state, &i))
/**
* Iterator construct over fail-safe sub-devices:
* s: (struct sub_device *), iterator
* i: (uint8_t), increment
* dev: (struct rte_eth_dev *), fail-safe ethdev
*/
#define FOREACH_SUBDEV(s, i, dev) \
FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED)
/* dev: (struct rte_eth_dev *) fail-safe device */
#define PREFERRED_SUBDEV(dev) \
(&PRIV(dev)->subs[0])
/* dev: (struct rte_eth_dev *) fail-safe device */
#define TX_SUBDEV(dev) \
(PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \
: (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \
: &PRIV(dev)->subs[PRIV(dev)->subs_tx]))
/**
* s: (struct sub_device *)
* ops: (struct eth_dev_ops) member
*/
#define SUBOPS(s, ops) \
(ETH(s)->dev_ops->ops)
/**
* Atomic guard
*/
/**
* a: (rte_atomic64_t)
*/
#define FS_ATOMIC_P(a) \
rte_atomic64_set(&(a), 1)
/**
* a: (rte_atomic64_t)
*/
#define FS_ATOMIC_V(a) \
rte_atomic64_set(&(a), 0)
/**
* s: (struct sub_device *)
* i: uint16_t qid
*/
#define FS_ATOMIC_RX(s, i) \
rte_atomic64_read( \
&((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \
)
/**
* s: (struct sub_device *)
* i: uint16_t qid
*/
#define FS_ATOMIC_TX(s, i) \
rte_atomic64_read( \
&((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \
)
#ifdef RTE_EXEC_ENV_BSDAPP
#define FS_THREADID_TYPE void*
#define FS_THREADID_FMT "p"
#else
#define FS_THREADID_TYPE unsigned long
#define FS_THREADID_FMT "lu"
#endif
extern int failsafe_logtype;
#define LOG__(l, m, ...) \
rte_log(RTE_LOG_ ## l, failsafe_logtype, \
"net_failsafe: " m "%c", __VA_ARGS__)
#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
#define DEBUG(...) LOG_(DEBUG, __VA_ARGS__)
#define INFO(...) LOG_(INFO, __VA_ARGS__)
#define WARN(...) LOG_(WARNING, __VA_ARGS__)
#define ERROR(...) LOG_(ERR, __VA_ARGS__)
/* inlined functions */
static inline struct sub_device *
fs_find_next(struct rte_eth_dev *dev,
uint8_t sid,
enum dev_state min_state,
uint8_t *sid_out)
{
struct sub_device *subs;
uint8_t tail;
subs = PRIV(dev)->subs;
tail = PRIV(dev)->subs_tail;
while (sid < tail) {
if (subs[sid].state >= min_state)
break;
sid++;
}
*sid_out = sid;
if (sid >= tail)
return NULL;
return &subs[sid];
}
/*
* Lock hot-plug mutex.
* is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
*/
static inline int
fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm)
{
int ret;
if (is_alarm) {
ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex);
if (ret) {
DEBUG("Hot-plug mutex lock trying failed(%s), will try"
" again later...", strerror(ret));
return ret;
}
PRIV(dev)->alarm_lock = 1;
} else {
ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex);
if (ret) {
ERROR("Cannot lock mutex(%s)", strerror(ret));
return ret;
}
}
DEBUG("Hot-plug mutex was locked by thread %" FS_THREADID_FMT "%s",
(FS_THREADID_TYPE)pthread_self(),
PRIV(dev)->alarm_lock ? " by the hot-plug alarm" : "");
return ret;
}
/*
* Unlock hot-plug mutex.
* is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
*/
static inline void
fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm)
{
int ret;
unsigned int prev_alarm_lock = PRIV(dev)->alarm_lock;
if (is_alarm) {
RTE_ASSERT(PRIV(dev)->alarm_lock == 1);
PRIV(dev)->alarm_lock = 0;
}
ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex);
if (ret)
ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret));
else
DEBUG("Hot-plug mutex was unlocked by thread %" FS_THREADID_FMT "%s",
(FS_THREADID_TYPE)pthread_self(),
prev_alarm_lock ? " by the hot-plug alarm" : "");
}
/*
* Switch emitting device.
* If banned is set, banned must not be considered for
* the role of emitting device.
*/
static inline void
fs_switch_dev(struct rte_eth_dev *dev,
struct sub_device *banned)
{
struct sub_device *txd;
enum dev_state req_state;
req_state = PRIV(dev)->state;
txd = TX_SUBDEV(dev);
if (PREFERRED_SUBDEV(dev)->state >= req_state &&
PREFERRED_SUBDEV(dev) != banned) {
if (txd != PREFERRED_SUBDEV(dev) &&
(txd == NULL ||
(req_state == DEV_STARTED) ||
(txd && txd->state < DEV_STARTED))) {
DEBUG("Switching tx_dev to preferred sub_device");
PRIV(dev)->subs_tx = 0;
}
} else if ((txd && txd->state < req_state) ||
txd == NULL ||
txd == banned) {
struct sub_device *sdev = NULL;
uint8_t i;
/* Using acceptable device */
FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) {
if (sdev == banned)
continue;
DEBUG("Switching tx_dev to sub_device %d",
i);
PRIV(dev)->subs_tx = i;
break;
}
if (i >= PRIV(dev)->subs_tail || sdev == NULL) {
DEBUG("No device ready, deactivating tx_dev");
PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
}
} else {
return;
}
set_burst_fn(dev, 0);
rte_wmb();
}
/*
* Adjust error value and rte_errno to the fail-safe actual error value.
*/
static inline int
fs_err(struct sub_device *sdev, int err)
{
/* A device removal shouldn't be reported as an error. */
if (sdev->remove == 1 || err == -EIO)
return rte_errno = 0;
return err;
}
#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */