Add handling for controller fatal status (csts.cfs).

On any I/O timeout, check for csts.cfs==1.  If set, the controller
is reporting fatal status and we reset the controller immediately,
rather than trying to abort the timed out command.

This changeset also includes deferring the controller start portion
of the reset to a separate task.  This ensures we are always performing
a controller start operation from a consistent context.

Sponsored by:	Intel
Reviewed by:	carl
This commit is contained in:
Jim Harris 2013-03-26 19:58:17 +00:00
parent dbba74428b
commit 12d191ec12
3 changed files with 37 additions and 3 deletions

View File

@ -427,7 +427,7 @@ nvme_ctrlr_reset(struct nvme_controller *ctrlr)
status = nvme_ctrlr_hw_reset(ctrlr);
DELAY(100*1000);
if (status == 0)
nvme_ctrlr_start(ctrlr);
taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->restart_task);
}
static int
@ -685,6 +685,14 @@ nvme_ctrlr_start(void *ctrlr_arg)
ctrlr->num_start_attempts++;
}
static void
nvme_ctrlr_restart_task(void *arg, int pending)
{
struct nvme_controller *ctrlr = arg;
nvme_ctrlr_start(ctrlr);
}
static void
nvme_ctrlr_intx_handler(void *arg)
{
@ -864,6 +872,11 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
ctrlr->cdev->si_drv1 = (void *)ctrlr;
TASK_INIT(&ctrlr->restart_task, 0, nvme_ctrlr_restart_task, ctrlr);
ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
taskqueue_thread_enqueue, &ctrlr->taskqueue);
taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
return (0);
}
@ -872,6 +885,8 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
{
int i;
taskqueue_free(ctrlr->taskqueue);
for (i = 0; i < NVME_MAX_NAMESPACES; i++)
nvme_ns_destruct(&ctrlr->ns[i]);

View File

@ -36,6 +36,7 @@
#include <sys/mutex.h>
#include <sys/rman.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <vm/uma.h>
@ -236,6 +237,8 @@ struct nvme_controller {
uint32_t ns_identified;
uint32_t queues_created;
uint32_t num_start_attempts;
struct task restart_task;
struct taskqueue *taskqueue;
/* For shared legacy interrupt. */
int rid;

View File

@ -98,7 +98,7 @@ nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
callout_init_mtx(&tr->timer, &qpair->lock, 0);
callout_init(&tr->timer, 1);
tr->cid = cid;
tr->qpair = qpair;
}
@ -456,8 +456,24 @@ static void
nvme_timeout(void *arg)
{
struct nvme_tracker *tr = arg;
struct nvme_qpair *qpair = tr->qpair;
struct nvme_controller *ctrlr = qpair->ctrlr;
union csts_register csts;
nvme_ctrlr_cmd_abort(tr->qpair->ctrlr, tr->cid, tr->qpair->id,
csts.raw = nvme_mmio_read_4(ctrlr, csts);
if (csts.bits.cfs == 1) {
/*
* The controller is reporting fatal status. Don't bother
* trying to abort the timed out command - proceed
* immediately to a controller-level reset.
*/
device_printf(ctrlr->dev,
"controller reports fatal status, resetting...\n");
nvme_ctrlr_reset(ctrlr);
return;
}
nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
nvme_abort_complete, tr);
}