Fix mps deadlock when handling panic

During shutdown mps waits for its SSU requests to complete however when
performing a reboot after handling a panic the scheduler is stopped so
getmicrotime which is used can be non-functional.

Switch to using the same method as shutdown_panic to ensure we actually
complete.

In addition reduce the timeout when RB_NOSYNC is set in howto as we expect
this to fail.

Reviewed by:	slm
MFC after:	1 week
Sponsored by:	Multiplay
Differential Revision:	https://reviews.freebsd.org/D12776
This commit is contained in:
smh 2018-03-14 21:32:23 +00:00
parent 4890f838a2
commit 8d975f8d73
2 changed files with 21 additions and 14 deletions

View File

@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kthread.h>
#include <sys/taskqueue.h>
#include <sys/sbuf.h>
#include <sys/reboot.h>
#include <machine/bus.h>
#include <machine/resource.h>
@ -126,7 +127,7 @@ int mpssas_get_sas_address_for_sata_disk(struct mps_softc *sc,
u64 *sas_address, u16 handle, u32 device_info, u8 *is_SATA_SSD);
static int mpssas_volume_add(struct mps_softc *sc,
u16 handle);
static void mpssas_SSU_to_SATA_devices(struct mps_softc *sc);
static void mpssas_SSU_to_SATA_devices(struct mps_softc *sc, int howto);
static void mpssas_stop_unit_done(struct cam_periph *periph,
union ccb *done_ccb);
@ -1122,7 +1123,7 @@ out:
* Return nothing.
*/
static void
mpssas_SSU_to_SATA_devices(struct mps_softc *sc)
mpssas_SSU_to_SATA_devices(struct mps_softc *sc, int howto)
{
struct mpssas_softc *sassc = sc->sassc;
union ccb *ccb;
@ -1130,7 +1131,7 @@ mpssas_SSU_to_SATA_devices(struct mps_softc *sc)
target_id_t targetid;
struct mpssas_target *target;
char path_str[64];
struct timeval cur_time, start_time;
int timeout;
/*
* For each target, issue a StartStopUnit command to stop the device.
@ -1193,17 +1194,23 @@ mpssas_SSU_to_SATA_devices(struct mps_softc *sc)
}
/*
* Wait until all of the SSU commands have completed or time has
* expired (60 seconds). Pause for 100ms each time through. If any
* command times out, the target will be reset in the SCSI command
* timeout routine.
* Timeout after 60 seconds by default or 10 seconds if howto has
* RB_NOSYNC set which indicates we're likely handling a panic.
*/
getmicrotime(&start_time);
while (sc->SSU_refcount) {
timeout = 600;
if (howto & RB_NOSYNC)
timeout = 100;
/*
* Wait until all of the SSU commands have completed or timeout has
* expired. Pause for 100ms each time through. If any command
* times out, the target will be reset in the SCSI command timeout
* routine.
*/
while (sc->SSU_refcount > 0) {
pause("mpswait", hz/10);
getmicrotime(&cur_time);
if ((cur_time.tv_sec - start_time.tv_sec) > 60) {
if (--timeout == 0) {
mps_dprint(sc, MPS_FAULT, "Time has expired waiting "
"for SSU commands to complete.\n");
break;
@ -1245,7 +1252,7 @@ mpssas_stop_unit_done(struct cam_periph *periph, union ccb *done_ccb)
* Return nothing.
*/
void
mpssas_ir_shutdown(struct mps_softc *sc)
mpssas_ir_shutdown(struct mps_softc *sc, int howto)
{
u16 volume_mapping_flags;
u16 ioc_pg8_flags = le16toh(sc->ioc_pg8.Flags);
@ -1350,5 +1357,5 @@ out:
}
}
}
mpssas_SSU_to_SATA_devices(sc);
mpssas_SSU_to_SATA_devices(sc, howto);
}

View File

@ -772,7 +772,7 @@ int mps_config_get_volume_wwid(struct mps_softc *sc, u16 volume_handle,
int mps_config_get_raid_pd_pg0(struct mps_softc *sc,
Mpi2ConfigReply_t *mpi_reply, Mpi2RaidPhysDiskPage0_t *config_page,
u32 page_address);
void mpssas_ir_shutdown(struct mps_softc *sc);
void mpssas_ir_shutdown(struct mps_softc *sc, int howto);
int mps_reinit(struct mps_softc *sc);
void mpssas_handle_reinit(struct mps_softc *sc);