fw_outstanding"(outstanding IOs at firmware level) counter gets screwed up when R1 fastpath

writes are running. Some of the cases which are not handled properly in driver are:

1. With R1 fastpath supported, single write from CAM layer can consume 2 MPT frames
at driver/firmware level for fastpath qualification(if fw_outstanding < controller Queue Depth).
Due to this driver has to throttle IOs coming from CAM layer as well as second fastpath
write(of R1 write) against Adapter Queue Depth.
If "fw_outstanding" reaches to adapter queue depth, driver should return IOs from CAM layer with
device busy status.While allocating second MPT frame(corresponding to R1 FP write) also, driver
should ensure fw_outstanding should not exceed adapter QD.

2. For R1 fastpath writes completion, driver decrements "fw_oustanding" counter without
really returning MPT frame to free pool. It may cause IOs(with heavy IOs running, consuming whole
adapter Queue Depth) consuming MPT frames reserved for DCMDs(management commands) and
DCMDs(internal and sent by application) not getting MPT frame will start failing.

Below is one test case to hit the issue described above-
1. Run heavy IOs (outstanding IOs should hit adapter Queue Depth).
2. Run management tool (Broadcom's storcli tool) querying adapter in loop (run command- "storcli64 /c0 show" in loop).
3. Management tool's requests would start failing due to non-availability of free MPT frames as all frames would be consumed by IOs.

Fix: Increment/decrement of "fw_outstanding" counter should be in sync with MPT frame get/return.

Submitted by: Sumit Saxena <sumit.saxena@broadcom.com>
Reviewed by:  Kashyap Desai <Kashyap.Desai@broadcom.com>
Approved by:  Ken
MFC after:  3 days
Sponsored by:   Broadcom Inc
This commit is contained in:
Kashyap D Desai 2019-03-12 09:24:58 +00:00
parent 5654a00747
commit 5437c8b88e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=345056
2 changed files with 29 additions and 9 deletions

View File

@ -1712,6 +1712,7 @@ mrsas_complete_cmd(struct mrsas_softc *sc, u_int32_t MSIxIndex)
mrsas_map_mpt_cmd_status(cmd_mpt, cmd_mpt->ccb_ptr, status,
extStatus, data_length, sense);
mrsas_cmd_done(sc, cmd_mpt);
mrsas_atomic_dec(&sc->fw_outstanding);
} else {
/*
* If the peer Raid 1/10 fast path failed,
@ -1735,12 +1736,13 @@ mrsas_complete_cmd(struct mrsas_softc *sc, u_int32_t MSIxIndex)
r1_cmd->callout_owner = false;
}
mrsas_release_mpt_cmd(r1_cmd);
mrsas_atomic_dec(&sc->fw_outstanding);
mrsas_map_mpt_cmd_status(cmd_mpt, cmd_mpt->ccb_ptr, status,
extStatus, data_length, sense);
mrsas_cmd_done(sc, cmd_mpt);
mrsas_atomic_dec(&sc->fw_outstanding);
}
}
mrsas_atomic_dec(&sc->fw_outstanding);
break;
case MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST: /* MFI command */
cmd_mfi = sc->mfi_cmd_list[cmd_mpt->sync_cmd_idx];
@ -2526,6 +2528,9 @@ mrsas_init_fw(struct mrsas_softc *sc)
else
sc->fast_path_io = 0;
}
device_printf(sc->mrsas_dev, "max_fw_cmds: %u max_scsi_cmds: %u\n",
sc->max_fw_cmds, sc->max_scsi_cmds);
return (0);
}

View File

@ -467,11 +467,20 @@ mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim,
return (0);
}
ccb_h->status |= CAM_SIM_QUEUED;
if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->max_scsi_cmds) {
ccb_h->status |= CAM_REQUEUE_REQ;
xpt_done(ccb);
mrsas_atomic_dec(&sc->fw_outstanding);
return (0);
}
cmd = mrsas_get_mpt_cmd(sc);
if (!cmd) {
ccb_h->status |= CAM_REQUEUE_REQ;
xpt_done(ccb);
mrsas_atomic_dec(&sc->fw_outstanding);
return (0);
}
@ -638,7 +647,7 @@ mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim,
mrsas_scsiio_timeout, cmd);
#endif
if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->io_cmds_highwater)
if (mrsas_atomic_read(&sc->fw_outstanding) > sc->io_cmds_highwater)
sc->io_cmds_highwater++;
/*
@ -653,7 +662,6 @@ mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim,
* new command
*/
if (cmd->r1_alt_dev_handle != MR_DEVHANDLE_INVALID) {
mrsas_atomic_inc(&sc->fw_outstanding);
mrsas_prepare_secondRaid1_IO(sc, cmd);
mrsas_fire_cmd(sc, req_desc->addr.u.low,
req_desc->addr.u.high);
@ -669,6 +677,7 @@ mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim,
done:
xpt_done(ccb);
mrsas_atomic_dec(&sc->fw_outstanding);
return (0);
}
@ -1092,14 +1101,20 @@ mrsas_setup_io(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
(io_info.r1_alt_dev_handle != MR_DEVHANDLE_INVALID) &&
(raid->level == 1) && !io_info.isRead) {
r1_cmd = mrsas_get_mpt_cmd(sc);
if (!r1_cmd) {
if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->max_scsi_cmds) {
fp_possible = FALSE;
printf("Avago debug fp disable from %s %d \n",
__func__, __LINE__);
mrsas_atomic_dec(&sc->fw_outstanding);
} else {
cmd->peer_cmd = r1_cmd;
r1_cmd->peer_cmd = cmd;
}
r1_cmd = mrsas_get_mpt_cmd(sc);
if (!r1_cmd) {
fp_possible = FALSE;
mrsas_atomic_dec(&sc->fw_outstanding);
}
else {
cmd->peer_cmd = r1_cmd;
r1_cmd->peer_cmd = cmd;
}
}
}
}