Since a fatal trap can happen at aribtrary times, don't panic when the

completions are not in a consistent state. Cope with the different
places the normal I/O completion polling thread can be interrupted and
then re-entered during a kernel panic + dump.

Reviewed by: jhb and markj (both prior versions)
Differential Revision:  https://reviews.freebsd.org/D20478
This commit is contained in:
Warner Losh 2019-06-01 15:37:44 +00:00
parent eafaa1bc35
commit d0aaeffdb4
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=348495

View File

@ -31,6 +31,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/proc.h>
#include <dev/pci/pcivar.h>
@ -308,7 +310,7 @@ get_status_string(uint16_t sct, uint16_t sc)
}
static void
nvme_qpair_print_completion(struct nvme_qpair *qpair,
nvme_qpair_print_completion(struct nvme_qpair *qpair,
struct nvme_completion *cpl)
{
uint16_t sct, sc;
@ -479,18 +481,51 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair)
struct nvme_tracker *tr;
struct nvme_completion cpl;
int done = 0;
bool in_panic = dumping || SCHEDULER_STOPPED();
qpair->num_intr_handler_calls++;
/*
* qpair is not enabled, likely because a controller reset is is in
* progress. Ignore the interrupt - any I/O that was associated with
* this interrupt will get retried when the reset is complete.
*/
if (!qpair->is_enabled)
/*
* qpair is not enabled, likely because a controller reset is
* is in progress. Ignore the interrupt - any I/O that was
* associated with this interrupt will get retried when the
* reset is complete.
*/
return (false);
/*
* A panic can stop the CPU this routine is running on at any point. If
* we're called during a panic, complete the sq_head wrap protocol for
* the case where we are interrupted just after the increment at 1
* below, but before we can reset cq_head to zero at 2. Also cope with
* the case where we do the zero at 2, but may or may not have done the
* phase adjustment at step 3. The panic machinery flushes all pending
* memory writes, so we can make these strong ordering assumptions
* that would otherwise be unwise if we were racing in real time.
*/
if (__predict_false(in_panic)) {
if (qpair->cq_head == qpair->num_entries) {
/*
* Here we know that we need to zero cq_head and then negate
* the phase, which hasn't been assigned if cq_head isn't
* zero due to the atomic_store_rel.
*/
qpair->cq_head = 0;
qpair->phase = !qpair->phase;
} else if (qpair->cq_head == 0) {
/*
* In this case, we know that the assignment at 2
* happened below, but we don't know if it 3 happened or
* not. To do this, we look at the last completion
* entry and set the phase to the opposite phase
* that it has. This gets us back in sync
*/
cpl = qpair->cpl[qpair->num_entries - 1];
nvme_completion_swapbytes(&cpl);
qpair->phase = !NVME_STATUS_GET_P(cpl.status);
}
}
bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
while (1) {
@ -508,17 +543,35 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair)
nvme_qpair_complete_tracker(qpair, tr, &cpl, ERROR_PRINT_ALL);
qpair->sq_head = cpl.sqhd;
done++;
} else {
nvme_printf(qpair->ctrlr,
} else if (!in_panic) {
/*
* A missing tracker is normally an error. However, a
* panic can stop the CPU this routine is running on
* after completing an I/O but before updating
* qpair->cq_head at 1 below. Later, we re-enter this
* routine to poll I/O associated with the kernel
* dump. We find that the tr has been set to null before
* calling the completion routine. If it hasn't
* completed (or it triggers a panic), then '1' below
* won't have updated cq_head. Rather than panic again,
* ignore this condition because it's not unexpected.
*/
nvme_printf(qpair->ctrlr,
"cpl does not map to outstanding cmd\n");
/* nvme_dump_completion expects device endianess */
nvme_dump_completion(&qpair->cpl[qpair->cq_head]);
KASSERT(0, ("received completion for unknown cmd\n"));
KASSERT(0, ("received completion for unknown cmd"));
}
if (++qpair->cq_head == qpair->num_entries) {
qpair->cq_head = 0;
qpair->phase = !qpair->phase;
/*
* There's a number of races with the following (see above) when
* the system panics. We compensate for each one of them by
* using the atomic store to force strong ordering (at least when
* viewed in the aftermath of a panic).
*/
if (++qpair->cq_head == qpair->num_entries) { /* 1 */
atomic_store_rel_int(&qpair->cq_head, 0); /* 2 */
qpair->phase = !qpair->phase; /* 3 */
}
nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,