Improve command lifecycle debugging and detection of problems.

Sponsored by:	Netflix
This commit is contained in:
Scott Long 2018-02-18 16:41:34 +00:00
parent 2fb9a51077
commit f0779b0452
6 changed files with 85 additions and 34 deletions

View File

@ -1135,6 +1135,9 @@ mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm)
if (++sc->io_cmds_active > sc->io_cmds_highwater)
sc->io_cmds_highwater++;
KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("command not busy\n"));
cm->cm_state = MPR_CM_STATE_INQUEUE;
if (sc->atomic_desc_capable) {
rd.u.low = cm->cm_desc.Words.Low;
mpr_regwrite(sc, MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET,
@ -1554,6 +1557,7 @@ mpr_alloc_requests(struct mpr_softc *sc)
cm->cm_sense_busaddr = sc->sense_busaddr + i * MPR_SENSE_LEN;
cm->cm_desc.Default.SMID = i;
cm->cm_sc = sc;
cm->cm_state = MPR_CM_STATE_BUSY;
TAILQ_INIT(&cm->cm_chain_list);
TAILQ_INIT(&cm->cm_prp_page_list);
callout_init_mtx(&cm->cm_callout, &sc->mpr_mtx, 0);
@ -2444,6 +2448,9 @@ mpr_intr_locked(void *data)
case MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS:
case MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS:
cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)];
KASSERT(cm->cm_state == MPR_CM_STATE_INQUEUE,
("command not inqueue\n"));
cm->cm_state = MPR_CM_STATE_BUSY;
cm->cm_reply = NULL;
break;
case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY:
@ -2513,6 +2520,9 @@ mpr_intr_locked(void *data)
} else {
cm = &sc->commands[
le16toh(desc->AddressReply.SMID)];
KASSERT(cm->cm_state == MPR_CM_STATE_INQUEUE,
("command not inqueue\n"));
cm->cm_state = MPR_CM_STATE_BUSY;
cm->cm_reply = reply;
cm->cm_reply_data =
le32toh(desc->AddressReply.
@ -2543,8 +2553,7 @@ mpr_intr_locked(void *data)
}
if (pq != sc->replypostindex) {
mpr_dprint(sc, MPR_TRACE,
"%s sc %p writing postindex %d\n",
mpr_dprint(sc, MPR_TRACE, "%s sc %p writing postindex %d\n",
__func__, sc, sc->replypostindex);
mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET,
sc->replypostindex);

View File

@ -1161,6 +1161,10 @@ mprsas_complete_all_commands(struct mpr_softc *sc)
/* complete all commands with a NULL reply */
for (i = 1; i < sc->num_reqs; i++) {
cm = &sc->commands[i];
if (cm->cm_state == MPR_CM_STATE_FREE)
continue;
cm->cm_state = MPR_CM_STATE_BUSY;
cm->cm_reply = NULL;
completed = 0;
@ -1173,9 +1177,7 @@ mprsas_complete_all_commands(struct mpr_softc *sc)
cm, cm->cm_state, cm->cm_ccb);
cm->cm_complete(sc, cm);
completed = 1;
}
if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) {
} else if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) {
mprsas_log_command(cm, MPR_RECOVERY,
"waking up cm %p state %x ccb %p for diag reset\n",
cm, cm->cm_state, cm->cm_ccb);
@ -1183,9 +1185,6 @@ mprsas_complete_all_commands(struct mpr_softc *sc)
completed = 1;
}
if (cm->cm_sc->io_cmds_active != 0)
cm->cm_sc->io_cmds_active--;
if ((completed == 0) && (cm->cm_state != MPR_CM_STATE_FREE)) {
/* this should never happen, but if it does, log */
mprsas_log_command(cm, MPR_RECOVERY,
@ -1194,6 +1193,8 @@ mprsas_complete_all_commands(struct mpr_softc *sc)
cm->cm_ccb);
}
}
sc->io_cmds_active = 0;
}
void
@ -1248,6 +1249,11 @@ mprsas_tm_timeout(void *data)
mprsas_log_command(tm, MPR_INFO|MPR_RECOVERY, "task mgmt %p timed "
"out\n", tm);
KASSERT(tm->cm_state == MPR_CM_STATE_INQUEUE,
("command not inqueue\n"));
tm->cm_state = MPR_CM_STATE_BUSY;
mpr_reinit(sc);
}
@ -1657,7 +1663,7 @@ mprsas_scsiio_timeout(void *data)
* and been re-used, though this is unlikely.
*/
mpr_intr_locked(sc);
if (cm->cm_state == MPR_CM_STATE_FREE) {
if (cm->cm_state != MPR_CM_STATE_INQUEUE) {
mprsas_log_command(cm, MPR_XINFO,
"SCSI command %p almost timed out\n", cm);
return;
@ -2492,6 +2498,7 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mpr_command *cm)
if (cm->cm_state == MPR_CM_STATE_TIMEDOUT) {
TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery);
cm->cm_state = MPR_CM_STATE_BUSY;
if (cm->cm_reply != NULL)
mprsas_log_command(cm, MPR_RECOVERY,
"completed timedout cm %p ccb %p during recovery "

View File

@ -243,6 +243,7 @@ struct mpr_command {
#define MPR_CM_STATE_FREE 0
#define MPR_CM_STATE_BUSY 1
#define MPR_CM_STATE_TIMEDOUT 2
#define MPR_CM_STATE_INQUEUE 3
bus_dmamap_t cm_dmamap;
struct scsi_sense_data *cm_sense;
uint64_t *nvme_error_response;
@ -569,6 +570,8 @@ mpr_free_command(struct mpr_softc *sc, struct mpr_command *cm)
struct mpr_chain *chain, *chain_temp;
struct mpr_prp_page *prp_page, *prp_page_temp;
KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("state not busy\n"));
if (cm->cm_reply != NULL)
mpr_free_reply(sc, cm->cm_reply_data);
cm->cm_reply = NULL;
@ -607,9 +610,10 @@ mpr_alloc_command(struct mpr_softc *sc)
if (cm == NULL)
return (NULL);
KASSERT(cm->cm_state == MPR_CM_STATE_FREE,
("mpr: Allocating busy command\n"));
TAILQ_REMOVE(&sc->req_list, cm, cm_link);
KASSERT(cm->cm_state == MPR_CM_STATE_FREE, ("mpr: Allocating busy "
"command\n"));
cm->cm_state = MPR_CM_STATE_BUSY;
return (cm);
}
@ -619,6 +623,8 @@ mpr_free_high_priority_command(struct mpr_softc *sc, struct mpr_command *cm)
{
struct mpr_chain *chain, *chain_temp;
KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("state not busy\n"));
if (cm->cm_reply != NULL)
mpr_free_reply(sc, cm->cm_reply_data);
cm->cm_reply = NULL;
@ -645,9 +651,10 @@ mpr_alloc_high_priority_command(struct mpr_softc *sc)
if (cm == NULL)
return (NULL);
KASSERT(cm->cm_state == MPR_CM_STATE_FREE,
("mpr: Allocating busy command\n"));
TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link);
KASSERT(cm->cm_state == MPR_CM_STATE_FREE, ("mpr: Allocating busy "
"command\n"));
cm->cm_state = MPR_CM_STATE_BUSY;
return (cm);
}

View File

@ -1102,6 +1102,10 @@ mps_enqueue_request(struct mps_softc *sc, struct mps_command *cm)
rd.u.low = cm->cm_desc.Words.Low;
rd.u.high = cm->cm_desc.Words.High;
rd.word = htole64(rd.word);
KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("command not busy\n"));
cm->cm_state = MPS_CM_STATE_INQUEUE;
/* TODO-We may need to make below regwrite atomic */
mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET,
rd.u.low);
@ -1502,6 +1506,7 @@ mps_alloc_requests(struct mps_softc *sc)
cm->cm_sense_busaddr = sc->sense_busaddr + i * MPS_SENSE_LEN;
cm->cm_desc.Default.SMID = i;
cm->cm_sc = sc;
cm->cm_state = MPS_CM_STATE_BUSY;
TAILQ_INIT(&cm->cm_chain_list);
callout_init_mtx(&cm->cm_callout, &sc->mps_mtx, 0);
@ -2305,6 +2310,9 @@ mps_intr_locked(void *data)
switch (flags) {
case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS:
cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)];
KASSERT(cm->cm_state == MPS_CM_STATE_INQUEUE,
("command not inqueue\n"));
cm->cm_state = MPS_CM_STATE_BUSY;
cm->cm_reply = NULL;
break;
case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY:
@ -2340,7 +2348,7 @@ mps_intr_locked(void *data)
sc->reply_frames, sc->fqdepth,
sc->replyframesz);
printf("%s: baddr %#x,\n", __func__, baddr);
/* LSI-TODO. See Linux Code. Need Graceful exit*/
/* LSI-TODO. See Linux Code for Graceful exit */
panic("Reply address out of range");
}
if (le16toh(desc->AddressReply.SMID) == 0) {
@ -2372,10 +2380,14 @@ mps_intr_locked(void *data)
(MPI2_EVENT_NOTIFICATION_REPLY *)
reply);
} else {
cm = &sc->commands[le16toh(desc->AddressReply.SMID)];
cm = &sc->commands[
le16toh(desc->AddressReply.SMID)];
KASSERT(cm->cm_state == MPS_CM_STATE_INQUEUE,
("command not inqueue\n"));
cm->cm_state = MPS_CM_STATE_BUSY;
cm->cm_reply = reply;
cm->cm_reply_data =
le32toh(desc->AddressReply.ReplyFrameAddress);
cm->cm_reply_data = le32toh(
desc->AddressReply.ReplyFrameAddress);
}
break;
}
@ -2403,10 +2415,10 @@ mps_intr_locked(void *data)
}
if (pq != sc->replypostindex) {
mps_dprint(sc, MPS_TRACE,
"%s sc %p writing postindex %d\n",
mps_dprint(sc, MPS_TRACE, "%s sc %p writing postindex %d\n",
__func__, sc, sc->replypostindex);
mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex);
mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET,
sc->replypostindex);
}
return;

View File

@ -1101,6 +1101,10 @@ mpssas_complete_all_commands(struct mps_softc *sc)
/* complete all commands with a NULL reply */
for (i = 1; i < sc->num_reqs; i++) {
cm = &sc->commands[i];
if (cm->cm_state == MPS_CM_STATE_FREE)
continue;
cm->cm_state = MPS_CM_STATE_BUSY;
cm->cm_reply = NULL;
completed = 0;
@ -1109,14 +1113,12 @@ mpssas_complete_all_commands(struct mps_softc *sc)
if (cm->cm_complete != NULL) {
mpssas_log_command(cm, MPS_RECOVERY,
"completing cm %p state %x ccb %p for diag reset\n",
"completing cm %p state %x ccb %p for diag reset\n",
cm, cm->cm_state, cm->cm_ccb);
cm->cm_complete(sc, cm);
completed = 1;
}
if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) {
} else if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) {
mpssas_log_command(cm, MPS_RECOVERY,
"waking up cm %p state %x ccb %p for diag reset\n",
cm, cm->cm_state, cm->cm_ccb);
@ -1124,9 +1126,6 @@ mpssas_complete_all_commands(struct mps_softc *sc)
completed = 1;
}
if (cm->cm_sc->io_cmds_active != 0)
cm->cm_sc->io_cmds_active--;
if ((completed == 0) && (cm->cm_state != MPS_CM_STATE_FREE)) {
/* this should never happen, but if it does, log */
mpssas_log_command(cm, MPS_RECOVERY,
@ -1135,6 +1134,8 @@ mpssas_complete_all_commands(struct mps_softc *sc)
cm->cm_ccb);
}
}
sc->io_cmds_active = 0;
}
void
@ -1191,6 +1192,11 @@ mpssas_tm_timeout(void *data)
mpssas_log_command(tm, MPS_INFO|MPS_RECOVERY,
"task mgmt %p timed out\n", tm);
KASSERT(tm->cm_state == MPS_CM_STATE_INQUEUE,
("command not inqueue\n"));
tm->cm_state = MPS_CM_STATE_BUSY;
mps_reinit(sc);
}
@ -1591,7 +1597,7 @@ mpssas_scsiio_timeout(void *data)
* and been re-used, though this is unlikely.
*/
mps_intr_locked(sc);
if (cm->cm_state == MPS_CM_STATE_FREE) {
if (cm->cm_state != MPS_CM_STATE_INQUEUE) {
mpssas_log_command(cm, MPS_XINFO,
"SCSI command %p almost timed out\n", cm);
return;
@ -2031,12 +2037,13 @@ mpssas_scsiio_complete(struct mps_softc *sc, struct mps_command *cm)
if (cm->cm_state == MPS_CM_STATE_TIMEDOUT) {
TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery);
cm->cm_state = MPS_CM_STATE_BUSY;
if (cm->cm_reply != NULL)
mpssas_log_command(cm, MPS_RECOVERY,
"completed timedout cm %p ccb %p during recovery "
"ioc %x scsi %x state %x xfer %u\n",
cm, cm->cm_ccb,
le16toh(rep->IOCStatus), rep->SCSIStatus, rep->SCSIState,
cm, cm->cm_ccb, le16toh(rep->IOCStatus),
rep->SCSIStatus, rep->SCSIState,
le32toh(rep->TransferCount));
else
mpssas_log_command(cm, MPS_RECOVERY,
@ -2047,8 +2054,8 @@ mpssas_scsiio_complete(struct mps_softc *sc, struct mps_command *cm)
mpssas_log_command(cm, MPS_RECOVERY,
"completed cm %p ccb %p during recovery "
"ioc %x scsi %x state %x xfer %u\n",
cm, cm->cm_ccb,
le16toh(rep->IOCStatus), rep->SCSIStatus, rep->SCSIState,
cm, cm->cm_ccb, le16toh(rep->IOCStatus),
rep->SCSIStatus, rep->SCSIState,
le32toh(rep->TransferCount));
else
mpssas_log_command(cm, MPS_RECOVERY,

View File

@ -243,6 +243,7 @@ struct mps_command {
#define MPS_CM_STATE_FREE 0
#define MPS_CM_STATE_BUSY 1
#define MPS_CM_STATE_TIMEDOUT 2
#define MPS_CM_STATE_INQUEUE 3
bus_dmamap_t cm_dmamap;
struct scsi_sense_data *cm_sense;
TAILQ_HEAD(, mps_chain) cm_chain_list;
@ -541,6 +542,8 @@ mps_free_command(struct mps_softc *sc, struct mps_command *cm)
{
struct mps_chain *chain, *chain_temp;
KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n"));
if (cm->cm_reply != NULL)
mps_free_reply(sc, cm->cm_reply_data);
cm->cm_reply = NULL;
@ -574,8 +577,10 @@ mps_alloc_command(struct mps_softc *sc)
if (cm == NULL)
return (NULL);
KASSERT(cm->cm_state == MPS_CM_STATE_FREE,
("mps: Allocating busy command\n"));
TAILQ_REMOVE(&sc->req_list, cm, cm_link);
KASSERT(cm->cm_state == MPS_CM_STATE_FREE, ("mps: Allocating busy command\n"));
cm->cm_state = MPS_CM_STATE_BUSY;
return (cm);
}
@ -585,6 +590,8 @@ mps_free_high_priority_command(struct mps_softc *sc, struct mps_command *cm)
{
struct mps_chain *chain, *chain_temp;
KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n"));
if (cm->cm_reply != NULL)
mps_free_reply(sc, cm->cm_reply_data);
cm->cm_reply = NULL;
@ -611,8 +618,10 @@ mps_alloc_high_priority_command(struct mps_softc *sc)
if (cm == NULL)
return (NULL);
KASSERT(cm->cm_state == MPS_CM_STATE_FREE,
("mps: Allocating busy command\n"));
TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link);
KASSERT(cm->cm_state == MPS_CM_STATE_FREE, ("mps: Allocating busy command\n"));
cm->cm_state = MPS_CM_STATE_BUSY;
return (cm);
}