By default, always escalate to controller reset when an I/O times out.
While aborts are typically cleaner than a full controller reset, many times an I/O timeout indicates other controller-level issues where aborts may not work. NVMe drivers for other operating systems are also defaulting to controller reset rather than aborts for timed out I/O. Sponsored by: Intel Reviewed by: carl
This commit is contained in:
parent
a0dc59709c
commit
48ce317898
@ -422,12 +422,8 @@ nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
|
||||
void
|
||||
nvme_ctrlr_reset(struct nvme_controller *ctrlr)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = nvme_ctrlr_hw_reset(ctrlr);
|
||||
DELAY(100*1000);
|
||||
if (status == 0)
|
||||
taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->restart_task);
|
||||
taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -686,11 +682,24 @@ nvme_ctrlr_start(void *ctrlr_arg)
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_ctrlr_restart_task(void *arg, int pending)
|
||||
nvme_ctrlr_reset_task(void *arg, int pending)
|
||||
{
|
||||
struct nvme_controller *ctrlr = arg;
|
||||
struct nvme_controller *ctrlr = arg;
|
||||
int status;
|
||||
|
||||
nvme_ctrlr_start(ctrlr);
|
||||
device_printf(ctrlr->dev, "resetting controller");
|
||||
status = nvme_ctrlr_hw_reset(ctrlr);
|
||||
/*
|
||||
* Use pause instead of DELAY, so that we yield to any nvme interrupt
|
||||
* handlers on this CPU that were blocked on a qpair lock. We want
|
||||
* all nvme interrupts completed before proceeding with restarting the
|
||||
* controller.
|
||||
*
|
||||
* XXX - any way to guarantee the interrupt handlers have quiesced?
|
||||
*/
|
||||
pause("nvmereset", hz / 10);
|
||||
if (status == 0)
|
||||
nvme_ctrlr_start(ctrlr);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -841,6 +850,9 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
|
||||
ctrlr->force_intx = 0;
|
||||
TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
|
||||
|
||||
ctrlr->enable_aborts = 0;
|
||||
TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
|
||||
|
||||
ctrlr->msix_enabled = 1;
|
||||
|
||||
if (ctrlr->force_intx) {
|
||||
@ -879,7 +891,7 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
|
||||
|
||||
ctrlr->cdev->si_drv1 = (void *)ctrlr;
|
||||
|
||||
TASK_INIT(&ctrlr->restart_task, 0, nvme_ctrlr_restart_task, ctrlr);
|
||||
TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
|
||||
ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
|
||||
taskqueue_thread_enqueue, &ctrlr->taskqueue);
|
||||
taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
|
||||
|
@ -230,6 +230,7 @@ struct nvme_controller {
|
||||
|
||||
uint32_t msix_enabled;
|
||||
uint32_t force_intx;
|
||||
uint32_t enable_aborts;
|
||||
|
||||
uint32_t num_io_queues;
|
||||
boolean_t per_cpu_io_queues;
|
||||
@ -239,7 +240,7 @@ struct nvme_controller {
|
||||
uint32_t ns_identified;
|
||||
uint32_t queues_created;
|
||||
uint32_t num_start_attempts;
|
||||
struct task restart_task;
|
||||
struct task reset_task;
|
||||
struct taskqueue *taskqueue;
|
||||
|
||||
/* For shared legacy interrupt. */
|
||||
|
@ -460,21 +460,20 @@ nvme_timeout(void *arg)
|
||||
struct nvme_controller *ctrlr = qpair->ctrlr;
|
||||
union csts_register csts;
|
||||
|
||||
/* Read csts to get value of cfs - controller fatal status. */
|
||||
csts.raw = nvme_mmio_read_4(ctrlr, csts);
|
||||
if (csts.bits.cfs == 1) {
|
||||
/*
|
||||
* The controller is reporting fatal status. Don't bother
|
||||
* trying to abort the timed out command - proceed
|
||||
* immediately to a controller-level reset.
|
||||
*/
|
||||
device_printf(ctrlr->dev,
|
||||
"controller reports fatal status, resetting...\n");
|
||||
nvme_ctrlr_reset(ctrlr);
|
||||
return;
|
||||
}
|
||||
device_printf(ctrlr->dev, "i/o timeout, csts.cfs=%d\n", csts.bits.cfs);
|
||||
nvme_dump_command(&tr->req->cmd);
|
||||
|
||||
nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
|
||||
nvme_abort_complete, tr);
|
||||
if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
|
||||
/*
|
||||
* If aborts are enabled, only use them if the controller is
|
||||
* not reporting fatal status.
|
||||
*/
|
||||
nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
|
||||
nvme_abort_complete, tr);
|
||||
} else
|
||||
nvme_ctrlr_reset(ctrlr);
|
||||
}
|
||||
|
||||
void
|
||||
|
Loading…
Reference in New Issue
Block a user