net/failsafe: support device removal

Listen to INTR_RMV events issued by slaves.
Add atomic flags on slave queues to detect use of slave bursts function.
If a removal is detected, set the recollection flag on this slave.

During a slave upkeep round, if its recollection flag is set and its
burst functions are not in use by any thread, remove that slave.

Signed-off-by: Gaetan Rivet <gaetan.rivet@6wind.com>
Acked-by: Olga Shern <olgas@mellanox.com>
This commit is contained in:
Gaetan Rivet 2017-07-18 14:48:20 +02:00 committed by Ferruh Yigit
parent 72a57bfd9a
commit 598fb8aec6
8 changed files with 234 additions and 20 deletions

View File

@ -51,6 +51,12 @@ The Fail-safe PMD only supports a limited set of features. If you plan to use a
device underneath the Fail-safe PMD with a specific feature, this feature must
be supported by the Fail-safe PMD to avoid throwing any error.
A notable exception is the device removal feature. The fail-safe PMD being a
virtual device, it cannot currently be removed in the sense of a specific bus
hotplug, like for PCI for example. It will however enable this feature for its
sub-device automatically, detecting those that are capable and register the
relevant callback for such event.
Check the feature matrix for the complete set of supported features.
Compilation option
@ -179,6 +185,14 @@ apply it upon the probing of its missing sub-device. After this configuration
pass, the new sub-device will be synchronized with other sub-devices, i.e. be
started if the fail-safe PMD has been started by the user before.
Plug-out feature
----------------
A sub-device supporting the device removal event can be removed from its bus at
any time. The fail-safe PMD will register a callback for such event and react
accordingly. It will try to safely stop, close and uninit the sub-device having
emitted this event, allowing it to free its eventual resources.
Fail-safe glossary
------------------
@ -195,6 +209,7 @@ Preferred device : Primary device
Upkeep round
Periodical process when slaves are serviced. Each devices having a state
different to that of the fail-safe device itself, is synchronized with it.
Additionally, each slave having the remove flag set are cleaned-up.
Slave
In the context of the fail-safe PMD, synonymous to sub-device.

View File

@ -142,6 +142,7 @@ fs_hotplug_alarm(void *arg)
if (ret)
ERROR("Unable to synchronize sub_device state");
}
failsafe_dev_remove(dev);
ret = failsafe_hotplug_alarm_install(dev);
if (ret)
ERROR("Unable to set up next alarm");

View File

@ -436,6 +436,17 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
dev, params);
}
static int
fs_parse_sub_device(struct sub_device *sdev)
{
struct rte_devargs *da;
char devstr[DEVARGS_MAXLEN] = "";
da = &sdev->devargs;
snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args);
return fs_parse_device(sdev, devstr);
}
int
failsafe_args_parse_subs(struct rte_eth_dev *dev)
{
@ -448,6 +459,8 @@ failsafe_args_parse_subs(struct rte_eth_dev *dev)
continue;
if (sdev->cmdline)
ret = fs_execute_cmd(sdev, sdev->cmdline);
else
ret = fs_parse_sub_device(sdev);
if (ret == 0)
sdev->state = DEV_PARSED;
}

View File

@ -63,6 +63,7 @@ fs_bus_init(struct rte_eth_dev *dev)
return -ENODEV;
}
SUB_ID(sdev) = i;
sdev->fs_dev = dev;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
@ -80,7 +81,7 @@ failsafe_eal_init(struct rte_eth_dev *dev)
return ret;
if (PRIV(dev)->state < DEV_PROBED)
PRIV(dev)->state = DEV_PROBED;
fs_switch_dev(dev);
fs_switch_dev(dev, NULL);
return 0;
}

View File

@ -250,6 +250,67 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
return 0;
}
static void
fs_dev_remove(struct sub_device *sdev)
{
int ret;
if (sdev == NULL)
return;
switch (sdev->state) {
case DEV_STARTED:
rte_eth_dev_stop(PORT_ID(sdev));
sdev->state = DEV_ACTIVE;
/* fallthrough */
case DEV_ACTIVE:
rte_eth_dev_close(PORT_ID(sdev));
sdev->state = DEV_PROBED;
/* fallthrough */
case DEV_PROBED:
ret = rte_eal_hotplug_remove(sdev->bus->name,
sdev->dev->name);
if (ret) {
ERROR("Bus detach failed for sub_device %u",
SUB_ID(sdev));
} else {
ETH(sdev)->state = RTE_ETH_DEV_UNUSED;
}
sdev->state = DEV_PARSED;
/* fallthrough */
case DEV_PARSED:
case DEV_UNDEFINED:
sdev->state = DEV_UNDEFINED;
/* the end */
break;
}
failsafe_hotplug_alarm_install(sdev->fs_dev);
}
static inline int
fs_rxtx_clean(struct sub_device *sdev)
{
uint16_t i;
for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
if (FS_ATOMIC_RX(sdev, i))
return 0;
for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
if (FS_ATOMIC_TX(sdev, i))
return 0;
return 1;
}
void
failsafe_dev_remove(struct rte_eth_dev *dev)
{
struct sub_device *sdev;
uint8_t i;
FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
if (sdev->remove && fs_rxtx_clean(sdev))
fs_dev_remove(sdev);
}
int
failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
{
@ -263,13 +324,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
ret = failsafe_args_parse_subs(dev);
if (ret)
return ret;
goto err_remove;
if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
if (ret)
return ret;
goto err_remove;
if (PRIV(dev)->state < DEV_ACTIVE)
return 0;
inactive = 0;
@ -278,15 +339,14 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
inactive |= UINT32_C(1) << i;
ret = dev->dev_ops->dev_configure(dev);
if (ret)
return ret;
goto err_remove;
FOREACH_SUBDEV(sdev, i, dev) {
if (inactive & (UINT32_C(1) << i)) {
ret = fs_eth_dev_conf_apply(dev, sdev);
if (ret) {
ERROR("Could not apply configuration to sub_device %d",
i);
/* TODO: disable device */
return ret;
goto err_remove;
}
}
}
@ -300,6 +360,30 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
return 0;
ret = dev->dev_ops->dev_start(dev);
if (ret)
return ret;
goto err_remove;
return 0;
err_remove:
FOREACH_SUBDEV(sdev, i, dev)
if (sdev->state != PRIV(dev)->state)
sdev->remove = 1;
return ret;
}
int
failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
enum rte_eth_event_type event __rte_unused,
void *cb_arg, void *out __rte_unused)
{
struct sub_device *sdev = cb_arg;
/* Switch as soon as possible tx_dev. */
fs_switch_dev(sdev->fs_dev, sdev);
/* Use safe bursts in any case. */
set_burst_fn(sdev->fs_dev, 1);
/*
* Async removal, the sub-PMD will try to unregister
* the callback at the source of the current thread context.
*/
sdev->remove = 1;
return 0;
}

View File

@ -34,6 +34,7 @@
#include <stdint.h>
#include <rte_debug.h>
#include <rte_atomic.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
#include <rte_flow.h>
@ -204,9 +205,21 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
}
FOREACH_SUBDEV(sdev, i, dev) {
int rmv_interrupt = 0;
if (sdev->state != DEV_PROBED)
continue;
rmv_interrupt = ETH(sdev)->data->dev_flags &
RTE_ETH_DEV_INTR_RMV;
if (rmv_interrupt) {
DEBUG("Enabling RMV interrupts for sub_device %d", i);
dev->data->dev_conf.intr_conf.rmv = 1;
} else {
DEBUG("sub_device %d does not support RMV event", i);
}
DEBUG("Configuring sub-device %d", i);
sdev->remove = 0;
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
dev->data->nb_tx_queues,
@ -215,6 +228,16 @@ fs_dev_configure(struct rte_eth_dev *dev)
ERROR("Could not configure sub_device %d", i);
return ret;
}
if (rmv_interrupt) {
ret = rte_eth_dev_callback_register(PORT_ID(sdev),
RTE_ETH_EVENT_INTR_RMV,
failsafe_eth_rmv_event_callback,
sdev);
if (ret)
WARN("Failed to register RMV callback for sub_device %d",
SUB_ID(sdev));
}
dev->data->dev_conf.intr_conf.rmv = 0;
sdev->state = DEV_ACTIVE;
}
if (PRIV(dev)->state < DEV_ACTIVE)
@ -240,7 +263,7 @@ fs_dev_start(struct rte_eth_dev *dev)
}
if (PRIV(dev)->state < DEV_STARTED)
PRIV(dev)->state = DEV_STARTED;
fs_switch_dev(dev);
fs_switch_dev(dev, NULL);
return 0;
}
@ -351,10 +374,14 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
fs_rx_queue_release(rxq);
dev->data->rx_queues[rx_queue_id] = NULL;
}
rxq = rte_zmalloc(NULL, sizeof(*rxq),
rxq = rte_zmalloc(NULL,
sizeof(*rxq) +
sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -ENOMEM;
FOREACH_SUBDEV(sdev, i, dev)
rte_atomic64_init(&rxq->refcnt[i]);
rxq->qid = rx_queue_id;
rxq->socket_id = socket_id;
rxq->info.mp = mb_pool;
@ -414,10 +441,14 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
fs_tx_queue_release(txq);
dev->data->tx_queues[tx_queue_id] = NULL;
}
txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
txq = rte_zmalloc("ethdev TX queue",
sizeof(*txq) +
sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -ENOMEM;
FOREACH_SUBDEV(sdev, i, dev)
rte_atomic64_init(&txq->refcnt[i]);
txq->qid = tx_queue_id;
txq->socket_id = socket_id;
txq->info.conf = *tx_conf;

View File

@ -36,6 +36,7 @@
#include <sys/queue.h>
#include <rte_atomic.h>
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@ -65,6 +66,7 @@ struct rxq {
uint8_t last_polled;
unsigned int socket_id;
struct rte_eth_rxq_info info;
rte_atomic64_t refcnt[];
};
struct txq {
@ -72,6 +74,7 @@ struct txq {
uint16_t qid;
unsigned int socket_id;
struct rte_eth_txq_info info;
rte_atomic64_t refcnt[];
};
struct rte_flow {
@ -101,6 +104,10 @@ struct sub_device {
enum dev_state state;
/* Some device are defined as a command line */
char *cmdline;
/* fail-safe device backreference */
struct rte_eth_dev *fs_dev;
/* flag calling for recollection */
volatile unsigned int remove:1;
};
struct fs_priv {
@ -168,6 +175,10 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev);
/* ETH_DEV */
int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
void failsafe_dev_remove(struct rte_eth_dev *dev);
int failsafe_eth_rmv_event_callback(uint8_t port_id,
enum rte_eth_event_type type,
void *arg, void *out);
/* GLOBALS */
@ -233,6 +244,39 @@ extern int mac_from_arg;
#define SUBOPS(s, ops) \
(ETH(s)->dev_ops->ops)
/**
* Atomic guard
*/
/**
* a: (rte_atomic64_t)
*/
#define FS_ATOMIC_P(a) \
rte_atomic64_add(&(a), 1)
/**
* a: (rte_atomic64_t)
*/
#define FS_ATOMIC_V(a) \
rte_atomic64_sub(&(a), 1)
/**
* s: (struct sub_device *)
* i: uint16_t qid
*/
#define FS_ATOMIC_RX(s, i) \
rte_atomic64_read( \
&((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \
)
/**
* s: (struct sub_device *)
* i: uint16_t qid
*/
#define FS_ATOMIC_TX(s, i) \
rte_atomic64_read( \
&((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \
)
#define LOG__(level, m, ...) \
RTE_LOG(level, PMD, "net_failsafe: " m "%c", __VA_ARGS__)
#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
@ -257,33 +301,45 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}
/*
* Switch emitting device.
* If banned is set, banned must not be considered for
* the role of emitting device.
*/
static inline void
fs_switch_dev(struct rte_eth_dev *dev)
fs_switch_dev(struct rte_eth_dev *dev,
struct sub_device *banned)
{
struct sub_device *txd;
enum dev_state req_state;
req_state = PRIV(dev)->state;
if (PREFERRED_SUBDEV(dev)->state >= req_state) {
if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
(TX_SUBDEV(dev) == NULL ||
txd = TX_SUBDEV(dev);
if (PREFERRED_SUBDEV(dev)->state >= req_state &&
PREFERRED_SUBDEV(dev) != banned) {
if (txd != PREFERRED_SUBDEV(dev) &&
(txd == NULL ||
(req_state == DEV_STARTED) ||
(TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
(txd && txd->state < DEV_STARTED))) {
DEBUG("Switching tx_dev to preferred sub_device");
PRIV(dev)->subs_tx = 0;
}
} else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
TX_SUBDEV(dev) == NULL) {
} else if ((txd && txd->state < req_state) ||
txd == NULL ||
txd == banned) {
struct sub_device *sdev;
uint8_t i;
/* Using acceptable device */
FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) {
if (sdev == banned)
continue;
DEBUG("Switching tx_dev to sub_device %d",
i);
PRIV(dev)->subs_tx = i;
break;
}
} else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
} else if (txd && txd->state < req_state) {
DEBUG("No device ready, deactivating tx_dev");
PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
} else {

View File

@ -31,6 +31,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <rte_atomic.h>
#include <rte_debug.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>
@ -112,8 +113,10 @@ failsafe_rx_burst(void *queue,
if (unlikely(fs_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
nb_rx = ETH(sdev)->
rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
if (nb_rx) {
rxq->last_polled = i;
return nb_rx;
@ -146,8 +149,10 @@ failsafe_rx_burst_fast(void *queue,
sdev = &priv->subs[i];
RTE_ASSERT(!fs_rx_unsafe(sdev));
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
nb_rx = ETH(sdev)->
rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
if (nb_rx) {
rxq->last_polled = i;
return nb_rx;
@ -164,13 +169,17 @@ failsafe_tx_burst(void *queue,
struct sub_device *sdev;
struct txq *txq;
void *sub_txq;
uint16_t nb_tx;
txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
if (unlikely(fs_tx_unsafe(sdev)))
return 0;
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
FS_ATOMIC_P(txq->refcnt[sdev->sid]);
nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
FS_ATOMIC_V(txq->refcnt[sdev->sid]);
return nb_tx;
}
uint16_t
@ -181,10 +190,14 @@ failsafe_tx_burst_fast(void *queue,
struct sub_device *sdev;
struct txq *txq;
void *sub_txq;
uint16_t nb_tx;
txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
RTE_ASSERT(!fs_tx_unsafe(sdev));
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
FS_ATOMIC_P(txq->refcnt[sdev->sid]);
nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
FS_ATOMIC_V(txq->refcnt[sdev->sid]);
return nb_tx;
}