cxgbe(4): Add support for NIC suspend/resume and live reset.

Add suspend/resume callbacks to the driver and a live reset built around
them.  This commit covers the basic NIC and future commits will expand
this functionality to other stateful parts of the chip.  Suspend and
resume operate on the chip (the t?nex nexus device) and affect all its
ports.  It is not possible to suspend/resume or reset individual ports.
All these operations can be performed on a running NIC.  A reset will
look like a link bounce to the networking stack.

Here are some ways to exercise this functionality:

 /* Manual suspend and resume. */
 # devctl suspend t6nex0
 # devctl resume t6nex0

 /* Manual reset. */
 # devctl reset t6nex0

 /* Manual reset with driver sysctl. */
 # sysctl dev.t6nex.0.reset=1

 /* Automatic adapter reset on any fatal error. */
 # hw.cxgbe.reset_on_fatal_err=1

Suspend disables the adapter (DMA, interrupts, and the port PHYs) and
marks the hardware as unavailable to the driver.  All ifnets associated
with the adapter are still visible to the kernel but operations that
require hardware interaction will fail with ENXIO.  All ifnets report
link-down while the adapter is suspended.

Resume will reattach to the card, reconfigure it as before, and recreate
the queues servicing the existing ifnets.  The ifnets are able to send
and receive traffic as soon as the link comes back up.

Reset is roughly the same as a suspend and a resume with at least one of
these events in between: D0->D3Hot->D0, FLR, PCIe link retrain.

MFC after:	1 month
Relnotes:	yes
Sponsored by:	Chelsio Communications
This commit is contained in:
Navdeep Parhar 2021-04-27 21:33:10 -07:00
parent f33f2365ee
commit 83b5cda106
6 changed files with 1393 additions and 306 deletions

View File

@ -165,6 +165,7 @@ enum {
IS_VF = (1 << 7),
KERN_TLS_ON = (1 << 8), /* HW is configured for KERN_TLS */
CXGBE_BUSY = (1 << 9),
HW_OFF_LIMITS = (1 << 10), /* off limits to all except reset_thread */
/* port flags */
HAS_TRACEQ = (1 << 3),
@ -955,13 +956,26 @@ struct adapter {
TAILQ_HEAD(, sge_fl) sfl;
struct callout sfl_callout;
struct mtx reg_lock; /* for indirect register access */
/*
* Driver code that can run when the adapter is suspended must use this
* lock or a synchronized_op and check for HW_OFF_LIMITS before
* accessing hardware.
*
* XXX: could be changed to rwlock. wlock in suspend/resume and for
* indirect register access, rlock everywhere else.
*/
struct mtx reg_lock;
struct memwin memwin[NUM_MEMWIN]; /* memory windows */
struct mtx tc_lock;
struct task tc_task;
struct task reset_task;
const void *reset_thread;
int num_resets;
int incarnation;
const char *last_op;
const void *last_op_thr;
int last_op_flags;
@ -1051,24 +1065,34 @@ forwarding_intr_to_fwq(struct adapter *sc)
return (sc->intr_count == 1);
}
/* Works reliably inside a sync_op or with reg_lock held. */
static inline bool
hw_off_limits(struct adapter *sc)
{
return (__predict_false(sc->flags & HW_OFF_LIMITS));
}
static inline uint32_t
t4_read_reg(struct adapter *sc, uint32_t reg)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
return bus_space_read_4(sc->bt, sc->bh, reg);
}
static inline void
t4_write_reg(struct adapter *sc, uint32_t reg, uint32_t val)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
bus_space_write_4(sc->bt, sc->bh, reg, val);
}
static inline uint64_t
t4_read_reg64(struct adapter *sc, uint32_t reg)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
#ifdef __LP64__
return bus_space_read_8(sc->bt, sc->bh, reg);
#else
@ -1081,7 +1105,8 @@ t4_read_reg64(struct adapter *sc, uint32_t reg)
static inline void
t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
#ifdef __LP64__
bus_space_write_8(sc->bt, sc->bh, reg, val);
#else
@ -1093,14 +1118,16 @@ t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val)
static inline void
t4_os_pci_read_cfg1(struct adapter *sc, int reg, uint8_t *val)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
*val = pci_read_config(sc->dev, reg, 1);
}
static inline void
t4_os_pci_write_cfg1(struct adapter *sc, int reg, uint8_t val)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
pci_write_config(sc->dev, reg, val, 1);
}
@ -1108,27 +1135,32 @@ static inline void
t4_os_pci_read_cfg2(struct adapter *sc, int reg, uint16_t *val)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
*val = pci_read_config(sc->dev, reg, 2);
}
static inline void
t4_os_pci_write_cfg2(struct adapter *sc, int reg, uint16_t val)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
pci_write_config(sc->dev, reg, val, 2);
}
static inline void
t4_os_pci_read_cfg4(struct adapter *sc, int reg, uint32_t *val)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
*val = pci_read_config(sc->dev, reg, 4);
}
static inline void
t4_os_pci_write_cfg4(struct adapter *sc, int reg, uint32_t val)
{
if (hw_off_limits(sc))
MPASS(curthread == sc->reset_thread);
pci_write_config(sc->dev, reg, val, 4);
}

View File

@ -171,7 +171,7 @@ update_clip(struct adapter *sc, void *arg __unused)
if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4clip"))
return;
if (mtx_initialized(&sc->clip_table_lock))
if (mtx_initialized(&sc->clip_table_lock) && !hw_off_limits(sc))
update_clip_table(sc);
end_synchronized_op(sc, LOCK_HELD);

View File

@ -522,6 +522,11 @@ set_filter_mode(struct adapter *sc, uint32_t mode)
if (rc)
return (rc);
if (hw_off_limits(sc)) {
rc = ENXIO;
goto done;
}
if (sc->tids.ftids_in_use > 0 || /* TCAM filters active */
sc->tids.hpftids_in_use > 0 || /* hi-pri TCAM filters active */
sc->tids.tids_in_use > 0) { /* TOE or hashfilters active */
@ -568,6 +573,11 @@ set_filter_mask(struct adapter *sc, uint32_t mode)
if (rc)
return (rc);
if (hw_off_limits(sc)) {
rc = ENXIO;
goto done;
}
if (sc->tids.tids_in_use > 0) { /* TOE or hashfilters active */
rc = EBUSY;
goto done;
@ -589,20 +599,27 @@ static inline uint64_t
get_filter_hits(struct adapter *sc, uint32_t tid)
{
uint32_t tcb_addr;
uint64_t hits;
tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE;
if (is_t4(sc)) {
uint64_t hits;
mtx_lock(&sc->reg_lock);
if (hw_off_limits(sc))
hits = 0;
else if (is_t4(sc)) {
uint64_t t;
read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8);
return (be64toh(hits));
read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&t, 8);
hits = be64toh(t);
} else {
uint32_t hits;
uint32_t t;
read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4);
return (be32toh(hits));
read_via_memwin(sc, 0, tcb_addr + 24, &t, 4);
hits = be32toh(t);
}
mtx_unlock(&sc->reg_lock);
return (hits);
}
int
@ -961,6 +978,11 @@ set_filter(struct adapter *sc, struct t4_filter *t)
if (rc)
return (rc);
if (hw_off_limits(sc)) {
rc = ENXIO;
goto done;
}
if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_init(sc)) != 0))
goto done;

File diff suppressed because it is too large Load Diff

View File

@ -63,7 +63,10 @@ set_sched_class_config(struct adapter *sc, int minmax)
rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc");
if (rc)
return (rc);
rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1);
if (hw_off_limits(sc))
rc = ENXIO;
else
rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1);
end_synchronized_op(sc, 0);
return (rc);
@ -209,9 +212,11 @@ set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p,
}
return (rc);
}
rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode,
fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate,
p->weight, p->pktsize, 0, sleep_ok);
if (!hw_off_limits(sc)) {
rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level,
fw_mode, fw_rateunit, fw_ratemode, p->channel, p->cl,
p->minrate, p->maxrate, p->weight, p->pktsize, 0, sleep_ok);
}
end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD);
if (p->level == SCHED_CLASS_LEVEL_CL_RL) {

View File

@ -289,6 +289,11 @@ t4_get_tracer(struct adapter *sc, struct t4_tracer *t)
if (rc)
return (rc);
if (hw_off_limits(sc)) {
rc = ENXIO;
goto done;
}
for (i = t->idx; i < NTRACE; i++) {
if (isset(&sc->tracer_valid, t->idx)) {
t4_get_trace_filter(sc, &tp, i, &enabled);
@ -338,6 +343,11 @@ t4_set_tracer(struct adapter *sc, struct t4_tracer *t)
if (rc)
return (rc);
if (hw_off_limits(sc)) {
rc = ENXIO;
goto done;
}
/*
* If no tracing filter is specified this time then check if the filter
* at the index is valid anyway because it was set previously. If so