From 8fe7bf064fe2bbde5d71e0e7d6711c6638147775 Mon Sep 17 00:00:00 2001
From: Warner Losh <imp@FreeBSD.org>
Date: Mon, 8 Jul 2019 20:20:01 +0000
Subject: [PATCH] Fix bugs in recovery path and improve cm tracking

Eliminate the TIMEDOUT state. This state really conveyed two different
concepts: I timed out during recovery (and my command got put on the
recovery queue), and I timed out diring discovery (which doesn't).
Separate those two concepts into two flags. Use the TIMEDOUT flag to
fail requests as timed out. Use the on queue flag to remove them from
the queue.

In mps_intr_locked for MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY message
type, when completing commands, ignore the ones that are not in state
INQUEUE. They were already completed as part of the recovery
process. When we complete them twice, we wind up with entries on the
free queue that are marked as busy, trigging asserts.

Reviewed by: scottl (earlier version, just for mpr)
Differential Revision: https://reviews.freebsd.org/D20785
---
 sys/dev/mpr/mpr.c         | 17 +++++++++++------
 sys/dev/mpr/mpr_sas.c     | 12 +++++++-----
 sys/dev/mpr/mpr_sas_lsi.c |  6 ++++--
 sys/dev/mpr/mprvar.h      |  5 +++--
 sys/dev/mps/mps.c         | 19 +++++++++++++++----
 sys/dev/mps/mps_sas.c     | 12 +++++++-----
 sys/dev/mps/mps_sas_lsi.c |  6 ++++--
 sys/dev/mps/mpsvar.h      | 15 +++++++++------
 8 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/sys/dev/mpr/mpr.c b/sys/dev/mpr/mpr.c
index 8741777333c1..d36a1148dc98 100644
--- a/sys/dev/mpr/mpr.c
+++ b/sys/dev/mpr/mpr.c
@@ -2617,12 +2617,17 @@ mpr_intr_locked(void *data)
 			} else {
 				cm = &sc->commands[
 				    le16toh(desc->AddressReply.SMID)];
-				if (cm->cm_state != MPR_CM_STATE_TIMEDOUT)
-					cm->cm_state = MPR_CM_STATE_BUSY;
-				cm->cm_reply = reply;
-				cm->cm_reply_data =
-				    le32toh(desc->AddressReply.
-				    ReplyFrameAddress);
+				if (cm->cm_state == MPR_CM_STATE_INQUEUE) {
+					cm->cm_reply = reply;
+					cm->cm_reply_data =
+					    le32toh(desc->AddressReply.
+						ReplyFrameAddress);
+				} else {
+					mpr_dprint(sc, MPR_RECOVERY,
+					    "Bad state for ADDRESS_REPLY status,"
+					    " ignoring state %d cm %p\n",
+					    cm->cm_state, cm);
+				}
 			}
 			break;
 		}
diff --git a/sys/dev/mpr/mpr_sas.c b/sys/dev/mpr/mpr_sas.c
index a292b38ed9af..534db5bd617e 100644
--- a/sys/dev/mpr/mpr_sas.c
+++ b/sys/dev/mpr/mpr_sas.c
@@ -1692,7 +1692,7 @@ mprsas_scsiio_timeout(void *data)
 	 * and been re-used, though this is unlikely.
 	 */
 	mpr_intr_locked(sc);
-	if (cm->cm_state != MPR_CM_STATE_INQUEUE) {
+	if (cm->cm_flags & MPR_CM_FLAGS_ON_RECOVERY) {
 		mprsas_log_command(cm, MPR_XINFO,
 		    "SCSI command %p almost timed out\n", cm);
 		return;
@@ -1721,7 +1721,7 @@ mprsas_scsiio_timeout(void *data)
 	 * operational.  if not, do a diag reset.
 	 */
 	mprsas_set_ccbstatus(cm->cm_ccb, CAM_CMD_TIMEOUT);
-	cm->cm_state = MPR_CM_STATE_TIMEDOUT;
+	cm->cm_flags |= MPR_CM_FLAGS_ON_RECOVERY | MPR_CM_FLAGS_TIMEDOUT;
 	TAILQ_INSERT_TAIL(&targ->timedout_commands, cm, cm_recovery);
 
 	if (targ->tm != NULL) {
@@ -2529,9 +2529,11 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mpr_command *cm)
 	TAILQ_REMOVE(&cm->cm_targ->commands, cm, cm_link);
 	ccb->ccb_h.status &= ~(CAM_STATUS_MASK | CAM_SIM_QUEUED);
 
-	if (cm->cm_state == MPR_CM_STATE_TIMEDOUT) {
+	if (cm->cm_flags & MPR_CM_FLAGS_ON_RECOVERY) {
 		TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery);
-		cm->cm_state = MPR_CM_STATE_BUSY;
+		KASSERT(cm->cm_state == MPR_CM_STATE_BUSY,
+		    ("Not busy for CM_FLAGS_TIMEDOUT: %d\n", cm->cm_state));
+		cm->cm_flags &= ~MPR_CM_FLAGS_ON_RECOVERY;
 		if (cm->cm_reply != NULL)
 			mprsas_log_command(cm, MPR_RECOVERY,
 			    "completed timedout cm %p ccb %p during recovery "
@@ -2817,7 +2819,7 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mpr_command *cm)
 		 * retry counter), the only difference is what gets printed
 		 * on the console.
 		 */
-		if (cm->cm_state == MPR_CM_STATE_TIMEDOUT)
+		if (cm->cm_flags & MPR_CM_FLAGS_TIMEDOUT)
 			mprsas_set_ccbstatus(ccb, CAM_CMD_TIMEOUT);
 		else
 			mprsas_set_ccbstatus(ccb, CAM_REQ_ABORTED);
diff --git a/sys/dev/mpr/mpr_sas_lsi.c b/sys/dev/mpr/mpr_sas_lsi.c
index 3d6c0b75f2e9..5e7da7d2e096 100644
--- a/sys/dev/mpr/mpr_sas_lsi.c
+++ b/sys/dev/mpr/mpr_sas_lsi.c
@@ -1017,7 +1017,7 @@ mprsas_add_device(struct mpr_softc *sc, u16 handle, u8 linkrate)
 		cm = &sc->commands[i];
 		if (cm->cm_flags & MPR_CM_FLAGS_SATA_ID_TIMEOUT) {
 			targ->timeouts++;
-			cm->cm_state = MPR_CM_STATE_TIMEDOUT;
+			cm->cm_flags |= MPR_CM_FLAGS_TIMEDOUT;
 
 			if ((targ->tm = mprsas_alloc_tm(sc)) != NULL) {
 				mpr_dprint(sc, MPR_INFO, "%s: sending Target "
@@ -1244,9 +1244,11 @@ mprsas_ata_id_timeout(struct mpr_softc *sc, struct mpr_command *cm)
 	/*
 	 * The Abort Task cannot be sent from here because the driver has not
 	 * completed setting up targets.  Instead, the command is flagged so
-	 * that special handling will be used to send the abort.
+	 * that special handling will be used to send the abort. Now that
+	 * this command has timed out, it's no longer in the queue.
 	 */
 	cm->cm_flags |= MPR_CM_FLAGS_SATA_ID_TIMEOUT;
+	cm->cm_state = MPR_CM_STATE_BUSY;
 }
 
 static int
diff --git a/sys/dev/mpr/mprvar.h b/sys/dev/mpr/mprvar.h
index 92d1dcfeddb0..8f1f7a385c72 100644
--- a/sys/dev/mpr/mprvar.h
+++ b/sys/dev/mpr/mprvar.h
@@ -275,11 +275,12 @@ struct mpr_command {
 #define	MPR_CM_FLAGS_ERROR_MASK		MPR_CM_FLAGS_CHAIN_FAILED
 #define	MPR_CM_FLAGS_USE_CCB		(1 << 9)
 #define	MPR_CM_FLAGS_SATA_ID_TIMEOUT	(1 << 10)
+#define MPR_CM_FLAGS_ON_RECOVERY	(1 << 12)
+#define MPR_CM_FLAGS_TIMEDOUT		(1 << 13)
 	u_int				cm_state;
 #define MPR_CM_STATE_FREE		0
 #define MPR_CM_STATE_BUSY		1
-#define MPR_CM_STATE_TIMEDOUT		2
-#define MPR_CM_STATE_INQUEUE		3
+#define MPR_CM_STATE_INQUEUE		2
 	bus_dmamap_t			cm_dmamap;
 	struct scsi_sense_data		*cm_sense;
 	uint64_t			*nvme_error_response;
diff --git a/sys/dev/mps/mps.c b/sys/dev/mps/mps.c
index 32b834f52b67..046beda137fc 100644
--- a/sys/dev/mps/mps.c
+++ b/sys/dev/mps/mps.c
@@ -2479,13 +2479,24 @@ mps_intr_locked(void *data)
 					    (MPI2_EVENT_NOTIFICATION_REPLY *)
 					    reply);
 			} else {
+				/*
+				 * Ignore commands not in INQUEUE state
+				 * since they've already been completed
+				 * via another path.
+				 */
 				cm = &sc->commands[
 				    le16toh(desc->AddressReply.SMID)];
-				if (cm->cm_state != MPS_CM_STATE_TIMEDOUT)
+				if (cm->cm_state == MPS_CM_STATE_INQUEUE) {
 					cm->cm_state = MPS_CM_STATE_BUSY;
-				cm->cm_reply = reply;
-				cm->cm_reply_data = le32toh(
-				    desc->AddressReply.ReplyFrameAddress);
+					cm->cm_reply = reply;
+					cm->cm_reply_data = le32toh(
+					    desc->AddressReply.ReplyFrameAddress);
+				} else {
+					mps_dprint(sc, MPS_RECOVERY,
+					    "Bad state for ADDRESS_REPLY status,"
+					    " ignoring state %d cm %p\n",
+					    cm->cm_state, cm);
+				}
 			}
 			break;
 		}
diff --git a/sys/dev/mps/mps_sas.c b/sys/dev/mps/mps_sas.c
index f35559ea9c52..8d8bac133cfa 100644
--- a/sys/dev/mps/mps_sas.c
+++ b/sys/dev/mps/mps_sas.c
@@ -1602,7 +1602,7 @@ mpssas_scsiio_timeout(void *data)
 	 * and been re-used, though this is unlikely.
 	 */
 	mps_intr_locked(sc);
-	if (cm->cm_state != MPS_CM_STATE_INQUEUE) {
+	if (cm->cm_flags & MPS_CM_FLAGS_ON_RECOVERY) {
 		mpssas_log_command(cm, MPS_XINFO,
 		    "SCSI command %p almost timed out\n", cm);
 		return;
@@ -1626,7 +1626,7 @@ mpssas_scsiio_timeout(void *data)
 	 * operational.  if not, do a diag reset.
 	 */
 	mpssas_set_ccbstatus(cm->cm_ccb, CAM_CMD_TIMEOUT);
-	cm->cm_state = MPS_CM_STATE_TIMEDOUT;
+	cm->cm_flags |= MPS_CM_FLAGS_ON_RECOVERY | MPS_CM_FLAGS_TIMEDOUT;
 	TAILQ_INSERT_TAIL(&targ->timedout_commands, cm, cm_recovery);
 
 	if (targ->tm != NULL) {
@@ -2040,9 +2040,11 @@ mpssas_scsiio_complete(struct mps_softc *sc, struct mps_command *cm)
 		biotrack(ccb->csio.bio, __func__);
 #endif
 
-	if (cm->cm_state == MPS_CM_STATE_TIMEDOUT) {
+	if (cm->cm_flags & MPS_CM_FLAGS_ON_RECOVERY) {
 		TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery);
-		cm->cm_state = MPS_CM_STATE_BUSY;
+		KASSERT(cm->cm_state == MPS_CM_STATE_BUSY,
+		    ("Not busy for CM_FLAGS_TIMEDOUT: %d\n", cm->cm_state));
+		cm->cm_flags &= ~MPS_CM_FLAGS_ON_RECOVERY;
 		if (cm->cm_reply != NULL)
 			mpssas_log_command(cm, MPS_RECOVERY,
 			    "completed timedout cm %p ccb %p during recovery "
@@ -2325,7 +2327,7 @@ mpssas_scsiio_complete(struct mps_softc *sc, struct mps_command *cm)
 		 * retry counter), the only difference is what gets printed
 		 * on the console.
 		 */
-		if (cm->cm_state == MPS_CM_STATE_TIMEDOUT)
+		if (cm->cm_flags & MPS_CM_FLAGS_TIMEDOUT)
 			mpssas_set_ccbstatus(ccb, CAM_CMD_TIMEOUT);
 		else
 			mpssas_set_ccbstatus(ccb, CAM_REQ_ABORTED);
diff --git a/sys/dev/mps/mps_sas_lsi.c b/sys/dev/mps/mps_sas_lsi.c
index 2f23b102e1cb..1362bd7cdd59 100644
--- a/sys/dev/mps/mps_sas_lsi.c
+++ b/sys/dev/mps/mps_sas_lsi.c
@@ -790,7 +790,7 @@ mpssas_add_device(struct mps_softc *sc, u16 handle, u8 linkrate){
 		cm = &sc->commands[i];
 		if (cm->cm_flags & MPS_CM_FLAGS_SATA_ID_TIMEOUT) {
 			targ->timeouts++;
-			cm->cm_state = MPS_CM_STATE_TIMEDOUT;
+			cm->cm_flags |= MPS_CM_FLAGS_TIMEDOUT;
 
 			if ((targ->tm = mpssas_alloc_tm(sc)) != NULL) {
 				mps_dprint(sc, MPS_INFO, "%s: sending Target "
@@ -1017,9 +1017,11 @@ mpssas_ata_id_timeout(struct mps_softc *sc, struct mps_command *cm)
 	/*
 	 * The Abort Task cannot be sent from here because the driver has not
 	 * completed setting up targets.  Instead, the command is flagged so
-	 * that special handling will be used to send the abort.
+	 * that special handling will be used to send the abort. Now that
+	 * this command has timed out, it's no longer in the queue.
 	 */
 	cm->cm_flags |= MPS_CM_FLAGS_SATA_ID_TIMEOUT;
+	cm->cm_state = MPS_CM_STATE_BUSY;
 }
 
 static int
diff --git a/sys/dev/mps/mpsvar.h b/sys/dev/mps/mpsvar.h
index 4a899bf31798..3193198c6cbd 100644
--- a/sys/dev/mps/mpsvar.h
+++ b/sys/dev/mps/mpsvar.h
@@ -242,11 +242,12 @@ struct mps_command {
 #define	MPS_CM_FLAGS_ERROR_MASK		MPS_CM_FLAGS_CHAIN_FAILED
 #define	MPS_CM_FLAGS_USE_CCB		(1 << 10)
 #define	MPS_CM_FLAGS_SATA_ID_TIMEOUT	(1 << 11)
+#define MPS_CM_FLAGS_ON_RECOVERY	(1 << 12)
+#define MPS_CM_FLAGS_TIMEDOUT		(1 << 13)
 	u_int				cm_state;
 #define MPS_CM_STATE_FREE		0
 #define MPS_CM_STATE_BUSY		1
-#define MPS_CM_STATE_TIMEDOUT		2
-#define MPS_CM_STATE_INQUEUE		3
+#define MPS_CM_STATE_INQUEUE		2
 	bus_dmamap_t			cm_dmamap;
 	struct scsi_sense_data		*cm_sense;
 	TAILQ_HEAD(, mps_chain)		cm_chain_list;
@@ -545,7 +546,8 @@ mps_free_command(struct mps_softc *sc, struct mps_command *cm)
 {
 	struct mps_chain *chain, *chain_temp;
 
-	KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n"));
+	KASSERT(cm->cm_state == MPS_CM_STATE_BUSY,
+	    ("state not busy: %d\n", cm->cm_state));
 
 	if (cm->cm_reply != NULL)
 		mps_free_reply(sc, cm->cm_reply_data);
@@ -581,7 +583,7 @@ mps_alloc_command(struct mps_softc *sc)
 		return (NULL);
 
 	KASSERT(cm->cm_state == MPS_CM_STATE_FREE,
-	    ("mps: Allocating busy command\n"));
+	    ("mps: Allocating busy command: %d\n", cm->cm_state));
 
 	TAILQ_REMOVE(&sc->req_list, cm, cm_link);
 	cm->cm_state = MPS_CM_STATE_BUSY;
@@ -594,7 +596,8 @@ mps_free_high_priority_command(struct mps_softc *sc, struct mps_command *cm)
 {
 	struct mps_chain *chain, *chain_temp;
 
-	KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n"));
+	KASSERT(cm->cm_state == MPS_CM_STATE_BUSY,
+	    ("state not busy: %d\n", cm->cm_state));
 
 	if (cm->cm_reply != NULL)
 		mps_free_reply(sc, cm->cm_reply_data);
@@ -623,7 +626,7 @@ mps_alloc_high_priority_command(struct mps_softc *sc)
 		return (NULL);
 
 	KASSERT(cm->cm_state == MPS_CM_STATE_FREE,
-	    ("mps: Allocating busy command\n"));
+	    ("mps: Allocating high priority busy command: %d\n", cm->cm_state));
 
 	TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link);
 	cm->cm_state = MPS_CM_STATE_BUSY;