mpr: fix freeze / release mismatch in timeout code

So, if we're processing a timeout, and we've sent an ABORT to the
firmware for that timeout, but not yet received the response from the
firmware, AND we get another timeout, we queue the timeout and freeze
the queue. However, when we've finally processed them all, we only
release the queue once. This causes all I/O to halt as the devq remains
frozen forever.

Instead, only freeze the queue when we start the process (eg set INRESET
on the target). This will allow the release when all the timed out I/Os
have finished ABORTing.

Sponsored by:		Netflix
Reviewed by:		mav
Differential Revision:	https://reviews.freebsd.org/D33054
This commit is contained in:
Warner Losh 2021-11-21 08:50:46 -07:00
parent 036af1053a
commit a8837c77ef

View File

@ -248,7 +248,8 @@ mprsas_free_tm(struct mpr_softc *sc, struct mpr_command *tm)
* INRESET flag as well or scsi I/O will not work.
*/
if (tm->cm_ccb) {
mpr_dprint(sc, MPR_XINFO, "Unfreezing devq for target ID %d\n",
mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
"Unfreezing devq for target ID %d\n",
tm->cm_targ->tid);
tm->cm_targ->flags &= ~MPRSAS_TARGET_INRESET;
xpt_release_devq(tm->cm_ccb->ccb_h.path, 1, TRUE);
@ -1924,6 +1925,9 @@ mprsas_action_scsiio(struct mprsas_softc *sassc, union ccb *ccb)
*/
if (targ->flags & MPRSAS_TARGET_INRESET) {
ccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_DEV_QFRZN;
mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
"%s: Freezing devq for target ID %d\n",
__func__, targ->tid);
xpt_freeze_devq(ccb->ccb_h.path, 1);
xpt_done(ccb);
return;
@ -2513,8 +2517,8 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mpr_command *cm)
if ((sassc->flags & MPRSAS_QUEUE_FROZEN) == 0) {
xpt_freeze_simq(sassc->sim, 1);
sassc->flags |= MPRSAS_QUEUE_FROZEN;
mpr_dprint(sc, MPR_XINFO, "Error sending command, "
"freezing SIM queue\n");
mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
"Error sending command, freezing SIM queue\n");
}
}
@ -2549,7 +2553,7 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mpr_command *cm)
if (sassc->flags & MPRSAS_QUEUE_FROZEN) {
ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
sassc->flags &= ~MPRSAS_QUEUE_FROZEN;
mpr_dprint(sc, MPR_XINFO,
mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
"Unfreezing SIM queue\n");
}
}
@ -2817,7 +2821,7 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mpr_command *cm)
if (sassc->flags & MPRSAS_QUEUE_FROZEN) {
ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
sassc->flags &= ~MPRSAS_QUEUE_FROZEN;
mpr_dprint(sc, MPR_XINFO, "Command completed, unfreezing SIM "
mpr_dprint(sc, MPR_INFO, "Command completed, unfreezing SIM "
"queue\n");
}
@ -3425,6 +3429,11 @@ mprsas_async(void *callback_arg, uint32_t code, struct cam_path *path,
* the target until the reset has completed. The CCB holds the path which
* is used to release the devq. The devq is released and the CCB is freed
* when the TM completes.
* We only need to do this when we're entering reset, not at each time we
* need to send an abort (which will happen if multiple commands timeout
* while we're sending the abort). We do not release the queue for each
* command we complete (just at the end when we free the tm), so freezing
* it each time doesn't make sense.
*/
void
mprsas_prepare_for_tm(struct mpr_softc *sc, struct mpr_command *tm,
@ -3440,13 +3449,15 @@ mprsas_prepare_for_tm(struct mpr_softc *sc, struct mpr_command *tm,
target->tid, lun_id) != CAM_REQ_CMP) {
xpt_free_ccb(ccb);
} else {
mpr_dprint(sc, MPR_XINFO,
"%s: Freezing devq for target ID %d\n",
__func__, target->tid);
xpt_freeze_devq(ccb->ccb_h.path, 1);
tm->cm_ccb = ccb;
tm->cm_targ = target;
target->flags |= MPRSAS_TARGET_INRESET;
if ((target->flags & MPRSAS_TARGET_INRESET) == 0) {
mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
"%s: Freezing devq for target ID %d\n",
__func__, target->tid);
xpt_freeze_devq(ccb->ccb_h.path, 1);
target->flags |= MPRSAS_TARGET_INRESET;
}
}
}
}