From 48ce31789834ace5279a97d2f18ea247c9389da4 Mon Sep 17 00:00:00 2001
From: Jim Harris <jimharris@FreeBSD.org>
Date: Tue, 26 Mar 2013 20:32:57 +0000
Subject: [PATCH] By default, always escalate to controller reset when an I/O
 times out.

While aborts are typically cleaner than a full controller reset, many times
an I/O timeout indicates other controller-level issues where aborts may not
work.  NVMe drivers for other operating systems are also defaulting to
controller reset rather than aborts for timed out I/O.

Sponsored by:	Intel
Reviewed by:	carl
---
 sys/dev/nvme/nvme_ctrlr.c   | 30 +++++++++++++++++++++---------
 sys/dev/nvme/nvme_private.h |  3 ++-
 sys/dev/nvme/nvme_qpair.c   | 25 ++++++++++++-------------
 3 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 6328134798da..cf6889e92472 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -422,12 +422,8 @@ nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
 void
 nvme_ctrlr_reset(struct nvme_controller *ctrlr)
 {
-	int status;
 
-	status = nvme_ctrlr_hw_reset(ctrlr);
-	DELAY(100*1000);
-	if (status == 0)
-		taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->restart_task);
+	taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
 }
 
 static int
@@ -686,11 +682,24 @@ err:
 }
 
 static void
-nvme_ctrlr_restart_task(void *arg, int pending)
+nvme_ctrlr_reset_task(void *arg, int pending)
 {
-	struct nvme_controller *ctrlr = arg;
+	struct nvme_controller	*ctrlr = arg;
+	int			status;
 
-	nvme_ctrlr_start(ctrlr);
+	device_printf(ctrlr->dev, "resetting controller");
+	status = nvme_ctrlr_hw_reset(ctrlr);
+	/*
+	 * Use pause instead of DELAY, so that we yield to any nvme interrupt
+	 *  handlers on this CPU that were blocked on a qpair lock. We want
+	 *  all nvme interrupts completed before proceeding with restarting the
+	 *  controller.
+	 *
+	 * XXX - any way to guarantee the interrupt handlers have quiesced?
+	 */
+	pause("nvmereset", hz / 10);
+	if (status == 0)
+		nvme_ctrlr_start(ctrlr);
 }
 
 static void
@@ -841,6 +850,9 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 	ctrlr->force_intx = 0;
 	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
 
+	ctrlr->enable_aborts = 0;
+	TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
+
 	ctrlr->msix_enabled = 1;
 
 	if (ctrlr->force_intx) {
@@ -879,7 +891,7 @@ intx:
 
 	ctrlr->cdev->si_drv1 = (void *)ctrlr;
 
-	TASK_INIT(&ctrlr->restart_task, 0, nvme_ctrlr_restart_task, ctrlr);
+	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
 	ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
 	taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 89217102ebb4..bae9aaab6776 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -230,6 +230,7 @@ struct nvme_controller {
 
 	uint32_t		msix_enabled;
 	uint32_t		force_intx;
+	uint32_t		enable_aborts;
 
 	uint32_t		num_io_queues;
 	boolean_t		per_cpu_io_queues;
@@ -239,7 +240,7 @@ struct nvme_controller {
 	uint32_t		ns_identified;
 	uint32_t		queues_created;
 	uint32_t		num_start_attempts;
-	struct task		restart_task;
+	struct task		reset_task;
 	struct taskqueue	*taskqueue;
 
 	/* For shared legacy interrupt. */
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
index 9783db9115f0..602171f44a28 100644
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -460,21 +460,20 @@ nvme_timeout(void *arg)
 	struct nvme_controller	*ctrlr = qpair->ctrlr;
 	union csts_register	csts;
 
+	/* Read csts to get value of cfs - controller fatal status. */
 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
-	if (csts.bits.cfs == 1) {
-		/*
-		 * The controller is reporting fatal status.  Don't bother
-		 *  trying to abort the timed out command - proceed
-		 *  immediately to a controller-level reset.
-		 */
-		device_printf(ctrlr->dev,
-		    "controller reports fatal status, resetting...\n");
-		nvme_ctrlr_reset(ctrlr);
-		return;
-	}
+	device_printf(ctrlr->dev, "i/o timeout, csts.cfs=%d\n", csts.bits.cfs);
+	nvme_dump_command(&tr->req->cmd);
 
-	nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
-	    nvme_abort_complete, tr);
+	if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
+		/*
+		 * If aborts are enabled, only use them if the controller is
+		 *  not reporting fatal status.
+		 */
+		nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
+		    nvme_abort_complete, tr);
+	} else
+		nvme_ctrlr_reset(ctrlr);
 }
 
 void