Raise the watchdog timer interval to 2 ticks, there by guaranteeing

that it fires between 1ms and 2ms. `
Treat two consecutive occurrences of Heartbeat failures as a legitimate
Heartbeat failure

MFC after:3 days
This commit is contained in:
David C Somayajulu 2017-07-19 19:08:37 +00:00
parent eb5ea8788f
commit 12e46bad2c
4 changed files with 14 additions and 5 deletions

View File

@ -105,7 +105,7 @@ struct qla_ivec {
typedef struct qla_ivec qla_ivec_t;
#define QLA_WATCHDOG_CALLOUT_TICKS 1
#define QLA_WATCHDOG_CALLOUT_TICKS 2
typedef struct _qla_tx_ring {
qla_tx_buf_t tx_buf[NUM_TX_DESCRIPTORS];

View File

@ -3366,7 +3366,7 @@ ql_hw_check_health(qla_host_t *ha)
ha->hw.health_count++;
if (ha->hw.health_count < 1000)
if (ha->hw.health_count < 500)
return 0;
ha->hw.health_count = 0;
@ -3385,10 +3385,18 @@ ql_hw_check_health(qla_host_t *ha)
if ((val != ha->hw.hbeat_value) &&
(!(QL_ERR_INJECT(ha, INJCT_HEARTBEAT_FAILURE)))) {
ha->hw.hbeat_value = val;
ha->hw.hbeat_failure = 0;
return 0;
}
device_printf(ha->pci_dev, "%s: Heartbeat Failue [0x%08x]\n",
__func__, val);
ha->hw.hbeat_failure++;
if (ha->hw.hbeat_failure < 2) /* we ignore the first failure */
return 0;
else
device_printf(ha->pci_dev, "%s: Heartbeat Failue [0x%08x]\n",
__func__, val);
return -1;
}

View File

@ -1671,6 +1671,7 @@ typedef struct _qla_hw {
/* heart beat register value */
uint32_t hbeat_value;
uint32_t health_count;
uint32_t hbeat_failure;
uint32_t max_tx_segs;
uint32_t min_lro_pkt_size;

View File

@ -276,7 +276,7 @@ qla_watchdog(void *arg)
ha->qla_watchdog_paused = 1;
}
ha->watchdog_ticks = ha->watchdog_ticks++ % 1000;
ha->watchdog_ticks = ha->watchdog_ticks++ % 500;
callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS,
qla_watchdog, ha);
}