cxgbe(4): Improved error reporting and diagnostics.

"slow" interrupt handler:
- Expand the list of INT_CAUSE registers known to the driver.
- Add decode information for many more bits but decouple it from the
  rest of intr_info so that it is entirely optional.
- Call t4_fatal_err exactly once, and from the top level PL intr handler.

t4_fatal_err:
- Use t4_shutdown_adapter from the common code to stop the adapter.
- Stop servicing slow interrupts after the first fatal one.

Driver/firmware interaction:
- CH_DUMP_MBOX: note whether the mailbox being dumped is a command or a
  reply or something else.
- Log the raw value of pcie_fw for some errors.
- Use correct log levels (debug vs. error).

Sponsored by:	Chelsio Communications
This commit is contained in:
np 2019-02-01 20:42:49 +00:00
parent ab08890a66
commit 63376d5ae2
5 changed files with 1279 additions and 710 deletions

View File

@ -155,7 +155,7 @@ enum {
CHK_MBOX_ACCESS = (1 << 2),
MASTER_PF = (1 << 3),
ADAP_SYSCTL_CTX = (1 << 4),
/* TOM_INIT_DONE= (1 << 5), No longer used */
ADAP_ERR = (1 << 5),
BUF_PACKING_OK = (1 << 6),
IS_VF = (1 << 7),
@ -175,6 +175,7 @@ enum {
DF_LOAD_FW_ANYTIME = (1 << 1), /* Allow LOAD_FW after init */
DF_DISABLE_TCB_CACHE = (1 << 2), /* Disable TCB cache (T6+) */
DF_DISABLE_CFG_RETRY = (1 << 3), /* Disable fallback config */
DF_VERBOSE_SLOWINTR = (1 << 4), /* Chatty slow intr handler */
};
#define IS_DOOMED(vi) ((vi)->flags & DOOMED)
@ -932,24 +933,6 @@ struct adapter {
#define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq)
#define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq)
#define CH_DUMP_MBOX(sc, mbox, data_reg) \
do { \
if (sc->debug_flags & DF_DUMP_MBOX) { \
log(LOG_NOTICE, \
"%s mbox %u: %016llx %016llx %016llx %016llx " \
"%016llx %016llx %016llx %016llx\n", \
device_get_nameunit(sc->dev), mbox, \
(unsigned long long)t4_read_reg64(sc, data_reg), \
(unsigned long long)t4_read_reg64(sc, data_reg + 8), \
(unsigned long long)t4_read_reg64(sc, data_reg + 16), \
(unsigned long long)t4_read_reg64(sc, data_reg + 24), \
(unsigned long long)t4_read_reg64(sc, data_reg + 32), \
(unsigned long long)t4_read_reg64(sc, data_reg + 40), \
(unsigned long long)t4_read_reg64(sc, data_reg + 48), \
(unsigned long long)t4_read_reg64(sc, data_reg + 56)); \
} \
} while (0)
#define for_each_txq(vi, iter, q) \
for (q = &vi->pi->adapter->sge.txq[vi->first_txq], iter = 0; \
iter < vi->ntxq; ++iter, ++q)
@ -1105,6 +1088,38 @@ t4_use_ldst(struct adapter *sc)
#endif
}
static inline void
CH_DUMP_MBOX(struct adapter *sc, int mbox, const int reg,
const char *msg, const __be64 *const p, const bool err)
{
if (!(sc->debug_flags & DF_DUMP_MBOX) && !err)
return;
if (p != NULL) {
log(err ? LOG_ERR : LOG_DEBUG,
"%s: mbox %u %s %016llx %016llx %016llx %016llx "
"%016llx %016llx %016llx %016llx\n",
device_get_nameunit(sc->dev), mbox, msg,
(long long)be64_to_cpu(p[0]), (long long)be64_to_cpu(p[1]),
(long long)be64_to_cpu(p[2]), (long long)be64_to_cpu(p[3]),
(long long)be64_to_cpu(p[4]), (long long)be64_to_cpu(p[5]),
(long long)be64_to_cpu(p[6]), (long long)be64_to_cpu(p[7]));
} else {
log(err ? LOG_ERR : LOG_DEBUG,
"%s: mbox %u %s %016llx %016llx %016llx %016llx "
"%016llx %016llx %016llx %016llx\n",
device_get_nameunit(sc->dev), mbox, msg,
(long long)t4_read_reg64(sc, reg),
(long long)t4_read_reg64(sc, reg + 8),
(long long)t4_read_reg64(sc, reg + 16),
(long long)t4_read_reg64(sc, reg + 24),
(long long)t4_read_reg64(sc, reg + 32),
(long long)t4_read_reg64(sc, reg + 40),
(long long)t4_read_reg64(sc, reg + 48),
(long long)t4_read_reg64(sc, reg + 56));
}
}
/* t4_main.c */
extern int t4_ntxq;
extern int t4_nrxq;

View File

@ -34,10 +34,6 @@
#include "t4_hw.h"
#define GLBL_INTR_MASK (F_CIM | F_MPS | F_PL | F_PCIE | F_MC0 | F_EDC0 | \
F_EDC1 | F_LE | F_TP | F_MA | F_PM_TX | F_PM_RX | F_ULP_RX | \
F_CPL_SWITCH | F_SGE | F_ULP_TX)
enum {
MAX_NPORTS = 4, /* max # of ports */
SERNUM_LEN = 24, /* Serial # length */
@ -581,7 +577,7 @@ struct fw_filter_wr;
void t4_intr_enable(struct adapter *adapter);
void t4_intr_disable(struct adapter *adapter);
void t4_intr_clear(struct adapter *adapter);
int t4_slow_intr_handler(struct adapter *adapter);
int t4_slow_intr_handler(struct adapter *adapter, bool verbose);
int t4_hash_mac_addr(const u8 *addr);
int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
@ -621,9 +617,7 @@ int t4_init_sge_params(struct adapter *adapter);
int t4_init_tp_params(struct adapter *adap, bool sleep_ok);
int t4_filter_field_shift(const struct adapter *adap, int filter_sel);
int t4_port_init(struct adapter *adap, int mbox, int pf, int vf, int port_id);
void t4_fatal_err(struct adapter *adapter);
void t4_db_full(struct adapter *adapter);
void t4_db_dropped(struct adapter *adapter);
void t4_fatal_err(struct adapter *adapter, bool fw_error);
int t4_set_trace_filter(struct adapter *adapter, const struct trace_params *tp,
int filter_index, int enable);
void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp,

File diff suppressed because it is too large Load Diff

View File

@ -1077,6 +1077,7 @@ t4_attach(device_t dev)
rc = partition_resources(sc);
if (rc != 0)
goto done; /* error message displayed already */
t4_intr_clear(sc);
}
rc = get_params__post_init(sc);
@ -2563,14 +2564,23 @@ vcxgbe_detach(device_t dev)
}
void
t4_fatal_err(struct adapter *sc)
t4_fatal_err(struct adapter *sc, bool fw_error)
{
t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
t4_intr_disable(sc);
log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
t4_shutdown_adapter(sc);
log(LOG_ALERT, "%s: encountered fatal error, adapter stopped.\n",
device_get_nameunit(sc->dev));
if (t4_panic_on_fatal_err)
panic("panic requested on fatal error");
if (fw_error) {
ASSERT_SYNCHRONIZED_OP(sc);
sc->flags |= ADAP_ERR;
} else {
ADAPTER_LOCK(sc);
sc->flags |= ADAP_ERR;
ADAPTER_UNLOCK(sc);
}
}
void
@ -10069,20 +10079,6 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
return (rc);
}
void
t4_db_full(struct adapter *sc)
{
CXGBE_UNIMPLEMENTED(__func__);
}
void
t4_db_dropped(struct adapter *sc)
{
CXGBE_UNIMPLEMENTED(__func__);
}
#ifdef TCP_OFFLOAD
static int
toe_capability(struct vi_info *vi, int enable)

View File

@ -1394,8 +1394,12 @@ void
t4_intr_err(void *arg)
{
struct adapter *sc = arg;
const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0;
t4_slow_intr_handler(sc);
if (sc->flags & ADAP_ERR)
return;
t4_slow_intr_handler(sc, verbose);
}
/*