Add handling for controller fatal status (csts.cfs).
On any I/O timeout, check for csts.cfs==1. If set, the controller is reporting fatal status and we reset the controller immediately, rather than trying to abort the timed out command. This changeset also includes deferring the controller start portion of the reset to a separate task. This ensures we are always performing a controller start operation from a consistent context. Sponsored by: Intel Reviewed by: carl
This commit is contained in:
parent
dbba74428b
commit
12d191ec12
@ -427,7 +427,7 @@ nvme_ctrlr_reset(struct nvme_controller *ctrlr)
|
||||
status = nvme_ctrlr_hw_reset(ctrlr);
|
||||
DELAY(100*1000);
|
||||
if (status == 0)
|
||||
nvme_ctrlr_start(ctrlr);
|
||||
taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->restart_task);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -685,6 +685,14 @@ nvme_ctrlr_start(void *ctrlr_arg)
|
||||
ctrlr->num_start_attempts++;
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_ctrlr_restart_task(void *arg, int pending)
|
||||
{
|
||||
struct nvme_controller *ctrlr = arg;
|
||||
|
||||
nvme_ctrlr_start(ctrlr);
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_ctrlr_intx_handler(void *arg)
|
||||
{
|
||||
@ -864,6 +872,11 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
|
||||
|
||||
ctrlr->cdev->si_drv1 = (void *)ctrlr;
|
||||
|
||||
TASK_INIT(&ctrlr->restart_task, 0, nvme_ctrlr_restart_task, ctrlr);
|
||||
ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
|
||||
taskqueue_thread_enqueue, &ctrlr->taskqueue);
|
||||
taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -872,6 +885,8 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
taskqueue_free(ctrlr->taskqueue);
|
||||
|
||||
for (i = 0; i < NVME_MAX_NAMESPACES; i++)
|
||||
nvme_ns_destruct(&ctrlr->ns[i]);
|
||||
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/rman.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/taskqueue.h>
|
||||
|
||||
#include <vm/uma.h>
|
||||
|
||||
@ -236,6 +237,8 @@ struct nvme_controller {
|
||||
uint32_t ns_identified;
|
||||
uint32_t queues_created;
|
||||
uint32_t num_start_attempts;
|
||||
struct task restart_task;
|
||||
struct taskqueue *taskqueue;
|
||||
|
||||
/* For shared legacy interrupt. */
|
||||
int rid;
|
||||
|
@ -98,7 +98,7 @@ nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
|
||||
bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
|
||||
sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
|
||||
|
||||
callout_init_mtx(&tr->timer, &qpair->lock, 0);
|
||||
callout_init(&tr->timer, 1);
|
||||
tr->cid = cid;
|
||||
tr->qpair = qpair;
|
||||
}
|
||||
@ -456,8 +456,24 @@ static void
|
||||
nvme_timeout(void *arg)
|
||||
{
|
||||
struct nvme_tracker *tr = arg;
|
||||
struct nvme_qpair *qpair = tr->qpair;
|
||||
struct nvme_controller *ctrlr = qpair->ctrlr;
|
||||
union csts_register csts;
|
||||
|
||||
nvme_ctrlr_cmd_abort(tr->qpair->ctrlr, tr->cid, tr->qpair->id,
|
||||
csts.raw = nvme_mmio_read_4(ctrlr, csts);
|
||||
if (csts.bits.cfs == 1) {
|
||||
/*
|
||||
* The controller is reporting fatal status. Don't bother
|
||||
* trying to abort the timed out command - proceed
|
||||
* immediately to a controller-level reset.
|
||||
*/
|
||||
device_printf(ctrlr->dev,
|
||||
"controller reports fatal status, resetting...\n");
|
||||
nvme_ctrlr_reset(ctrlr);
|
||||
return;
|
||||
}
|
||||
|
||||
nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
|
||||
nvme_abort_complete, tr);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user