ena: start timer service on attach

The timer service was started when the interface was brought up and it
was stopped when it was brought down. Since ena_up requires the device
to be responsive, triggering the reset would become impossible if the
device became unresponsive with the interface down.

Since most of the functions in timer service already perform the check
to see if the device is running, this only requires starting the callout
in attach and stopping it when bringing the interface up or down to
avoid race between different admin queue calls.

Since callout functions for timer service are always called with the
same arguments, replace callout_{init,reset,drain} calls with
ENA_TIMER_{INIT,RESET,DRAIN} macros.

Submitted by: Dawid Gorecki <dgr@semihalf.com>
Obtained from: Semihalf
MFC after: 2 weeks
Sponsored by: Amazon, Inc.
This commit is contained in:
Dawid Gorecki 2022-01-03 14:50:13 +01:00 committed by Marcin Wojtas
parent b168d0c850
commit 78554d0c70
2 changed files with 45 additions and 27 deletions

View File

@ -2101,6 +2101,13 @@ ena_up(struct ena_adapter *adapter)
ena_log(adapter->pdev, INFO, "device is going UP\n");
/*
* ena_timer_service can use functions, which write to the admin queue.
* Those calls are not protected by ENA_LOCK, and because of that, the
* timer should be stopped when bringing the device up or down.
*/
ENA_TIMER_DRAIN(adapter);
/* setup interrupts for IO queues */
rc = ena_setup_io_intr(adapter);
if (unlikely(rc != 0)) {
@ -2143,19 +2150,12 @@ ena_up(struct ena_adapter *adapter)
if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
IFF_DRV_OACTIVE);
/* Activate timer service only if the device is running.
* If this flag is not set, it means that the driver is being
* reset and timer service will be activated afterwards.
*/
if (ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)) {
callout_reset_sbt(&adapter->timer_service, SBT_1S,
SBT_1S, ena_timer_service, (void *)adapter, 0);
}
ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
ena_unmask_all_io_irqs(adapter);
ENA_TIMER_RESET(adapter);
return (0);
err_up_complete:
@ -2165,6 +2165,8 @@ ena_up(struct ena_adapter *adapter)
err_create_queues_with_backoff:
ena_free_io_irq(adapter);
error:
ENA_TIMER_RESET(adapter);
return (rc);
}
@ -2469,9 +2471,10 @@ ena_down(struct ena_adapter *adapter)
if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
return;
ena_log(adapter->pdev, INFO, "device is going DOWN\n");
/* Drain timer service to avoid admin queue race condition. */
ENA_TIMER_DRAIN(adapter);
callout_drain(&adapter->timer_service);
ena_log(adapter->pdev, INFO, "device is going DOWN\n");
ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
@ -2495,6 +2498,8 @@ ena_down(struct ena_adapter *adapter)
ena_free_all_rx_resources(adapter);
counter_u64_add(adapter->dev_stats.interface_down, 1);
ENA_TIMER_RESET(adapter);
}
static uint32_t
@ -3249,8 +3254,10 @@ ena_timer_service(void *data)
adapter->eni_metrics_sample_interval)) {
/*
* There is no race with other admin queue calls, as:
* - Timer service runs after interface is up, so all
* - Timer service runs after attach function ends, so all
* configuration calls to the admin queue are finished.
* - Timer service is temporarily stopped when bringing
* the interface up or down.
* - After interface is up, the driver doesn't use (at least
* for now) other functions writing to the admin queue.
*
@ -3279,7 +3286,7 @@ ena_timer_service(void *data)
/*
* Schedule another timeout one second from now.
*/
callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0);
ENA_TIMER_RESET(adapter);
}
void
@ -3294,7 +3301,7 @@ ena_destroy_device(struct ena_adapter *adapter, bool graceful)
if_link_state_change(ifp, LINK_STATE_DOWN);
callout_drain(&adapter->timer_service);
ENA_TIMER_DRAIN(adapter);
dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
if (dev_up)
@ -3425,17 +3432,15 @@ ena_restore_device(struct ena_adapter *adapter)
/* Indicate that device is running again and ready to work */
ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
/*
* As the AENQ handlers weren't executed during reset because
* the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
* timestamp must be updated again That will prevent next reset
* caused by missing keep alive.
*/
adapter->keep_alive_timestamp = getsbinuptime();
callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
ena_timer_service, (void *)adapter, 0);
}
/*
* As the AENQ handlers weren't executed during reset because
* the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
* timestamp must be updated again That will prevent next reset
* caused by missing keep alive.
*/
adapter->keep_alive_timestamp = getsbinuptime();
ENA_TIMER_RESET(adapter);
ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
ena_log(dev, INFO,
@ -3457,6 +3462,8 @@ ena_restore_device(struct ena_adapter *adapter)
ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
ena_log(dev, ERR, "Reset attempt failed. Can not reset the device\n");
ENA_TIMER_RESET(adapter);
return (rc);
}
@ -3504,7 +3511,7 @@ ena_attach(device_t pdev)
* Set up the timer service - driver is responsible for avoiding
* concurrency, as the callout won't be using any locking inside.
*/
callout_init(&adapter->timer_service, true);
ENA_TIMER_INIT(adapter);
adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
@ -3689,6 +3696,9 @@ ena_attach(device_t pdev)
if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
/* Run the timer service */
ENA_TIMER_RESET(adapter);
return (0);
#ifdef DEV_NETMAP
@ -3742,7 +3752,7 @@ ena_detach(device_t pdev)
/* Stop timer service */
ENA_LOCK_LOCK();
callout_drain(&adapter->timer_service);
ENA_TIMER_DRAIN(adapter);
ENA_LOCK_UNLOCK();
/* Release reset task */

View File

@ -499,6 +499,14 @@ struct ena_adapter {
#define ENA_LOCK_UNLOCK() sx_unlock(&ena_global_lock)
#define ENA_LOCK_ASSERT() sx_assert(&ena_global_lock, SA_XLOCKED)
#define ENA_TIMER_INIT(_adapter) \
callout_init(&(_adapter)->timer_service, true)
#define ENA_TIMER_DRAIN(_adapter) \
callout_drain(&(_adapter)->timer_service)
#define ENA_TIMER_RESET(_adapter) \
callout_reset_sbt(&(_adapter)->timer_service, SBT_1S, SBT_1S, \
ena_timer_service, (void*)(_adapter), 0)
#define clamp_t(type, _x, min, max) min_t(type, max_t(type, _x, min), max)
#define clamp_val(val, lo, hi) clamp_t(__typeof(val), val, lo, hi)