Close a race in the isp(4) driver that caused devices to disappear

and not automatically come back if they were gone for a short
period of time.

The isp(4) driver has a 30 second gone device timer that gets
activated whenever a device goes away.  If the device comes back
before the timer expires, we don't send a notification to CAM that
it has gone away.  If, however, there is a command sent to the
device while it is gone and before it comes back, the isp(4) driver
sends the command back with CAM_SEL_TIMEOUT status.

CAM responds to the CAM_SEL_TIMEOUT status by removing the device.
In the case where a device comes back within the 30 second gone
device timer window, though, we weren't telling CAM the device
came back.

So, fix this by tracking whether we have told CAM the device is
gone, and if we have, send a rescan if it comes back within the 30
second window.

ispvar.h:
	In the fcportdb_t structure, add a new bitfield,
	reported_gone.  This gets set whenever we return a command
	with CAM_SEL_TIMEOUT status on a Fibre Channel device.

isp_freebsd.c:
	In isp_done(), if we're sending CAM_SEL_TIMEOUT for for a
	command sent to a FC device, set the reported_gone bit.

	In isp_async(), in the ISPASYNC_DEV_STAYED case, rescan the
	device in question if it is mapped to a target ID and has
	been reported gone.

	In isp_make_here(), take a port database entry argument,
	and clear the reported_gone bit when we send a rescan to
	CAM.

	In isp_make_gone(), take a port database entry as an
	argument, and set the reported_gone bit when we send an
	async event telling CAM consumers that the device is gone.

Sponsored by:	Spectra Logic
MFC after:	1 week
This commit is contained in:
Kenneth D. Merry 2015-01-08 17:51:12 +00:00
parent 7259906eb0
commit 5704e6f06c
2 changed files with 44 additions and 7 deletions

View File

@ -4565,7 +4565,7 @@ isp_watchdog(void *arg)
}
static void
isp_make_here(ispsoftc_t *isp, int chan, int tgt)
isp_make_here(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt)
{
union ccb *ccb;
struct isp_fc *fc = ISP_FC_PC(isp, chan);
@ -4588,11 +4588,18 @@ isp_make_here(ispsoftc_t *isp, int chan, int tgt)
xpt_free_ccb(ccb);
return;
}
/*
* Since we're about to issue a rescan, mark this device as not
* reported gone.
*/
fcp->reported_gone = 0;
xpt_rescan(ccb);
}
static void
isp_make_gone(ispsoftc_t *isp, int chan, int tgt)
isp_make_gone(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt)
{
struct cam_path *tp;
struct isp_fc *fc = ISP_FC_PC(isp, chan);
@ -4601,6 +4608,11 @@ isp_make_gone(ispsoftc_t *isp, int chan, int tgt)
return;
}
if (xpt_create_path(&tp, NULL, cam_sim_path(fc->sim), tgt, CAM_LUN_WILDCARD) == CAM_REQ_CMP) {
/*
* We're about to send out the lost device async
* notification, so indicate that we have reported it gone.
*/
fcp->reported_gone = 1;
xpt_async(AC_LOST_DEVICE, tp, NULL);
xpt_free_path(tp);
}
@ -4654,7 +4666,7 @@ isp_gdt_task(void *arg, int pending)
lp->dev_map_idx = 0;
lp->state = FC_PORTDB_STATE_NIL;
isp_prt(isp, ISP_LOGCONFIG, prom3, chan, lp->portid, tgt, "Gone Device Timeout");
isp_make_gone(isp, chan, tgt);
isp_make_gone(isp, lp, chan, tgt);
}
if (fc->ready) {
if (more_to_do) {
@ -4747,7 +4759,7 @@ isp_ldt_task(void *arg, int pending)
lp->dev_map_idx = 0;
lp->state = FC_PORTDB_STATE_NIL;
isp_prt(isp, ISP_LOGCONFIG, prom3, chan, lp->portid, tgt, "Loop Down Timeout");
isp_make_gone(isp, chan, tgt);
isp_make_gone(isp, lp, chan, tgt);
}
if (FCPARAM(isp, chan)->role & ISP_ROLE_INITIATOR) {
@ -5561,6 +5573,21 @@ isp_done(XS_T *sccb)
if (status != CAM_REQ_CMP) {
if (status != CAM_SEL_TIMEOUT)
isp_prt(isp, ISP_LOGDEBUG0, "target %d lun %d CAM status 0x%x SCSI status 0x%x", XS_TGT(sccb), XS_LUN(sccb), sccb->ccb_h.status, sccb->scsi_status);
else if ((IS_FC(isp))
&& (XS_TGT(sccb) < MAX_FC_TARG)) {
fcparam *fcp;
int hdlidx;
fcp = FCPARAM(isp, XS_CHANNEL(sccb));
hdlidx = fcp->isp_dev_map[XS_TGT(sccb)] - 1;
/*
* Note that we have reported that this device is
* gone. If it reappears, we'll need to issue a
* rescan.
*/
if (hdlidx > 0 && hdlidx < MAX_FC_TARG)
fcp->portdb[hdlidx].reported_gone = 1;
}
if ((sccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
sccb->ccb_h.status |= CAM_DEV_QFRZN;
xpt_freeze_devq(sccb->ccb_h.path, 1);
@ -5750,7 +5777,7 @@ isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
tgt = lp->dev_map_idx - 1;
isp_prt(isp, ISP_LOGCONFIG, prom2, bus, lp->portid, lp->handle, buf, "arrived at", tgt, (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1);
isp_make_here(isp, bus, tgt);
isp_make_here(isp, lp, bus, tgt);
} else {
isp_prt(isp, ISP_LOGCONFIG, prom0, bus, lp->portid, lp->handle, buf, "arrived", (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
}
@ -5770,7 +5797,7 @@ isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
FCPARAM(isp, bus)->isp_dev_map[tgt] = 0;
lp->dev_map_idx = 0;
isp_prt(isp, ISP_LOGCONFIG, prom3, bus, lp->portid, tgt, "change is bad");
isp_make_gone(isp, bus, tgt);
isp_make_gone(isp, lp, bus, tgt);
} else {
isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
isp_prt(isp, ISP_LOGCONFIG, prom0, bus, lp->portid, lp->handle, buf, "changed and departed",
@ -5803,6 +5830,15 @@ isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
tgt = lp->dev_map_idx - 1;
isp_prt(isp, ISP_LOGCONFIG, prom2, bus, lp->portid, lp->handle, buf, "stayed at", tgt,
(uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
/*
* Only issue a rescan if we've actually reported
* that this device is gone.
*/
if (lp->reported_gone != 0) {
isp_prt(isp, ISP_LOGCONFIG, prom2, bus, lp->portid, lp->handle, buf, "rescanned at", tgt,
(uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
isp_make_here(isp, lp, bus, tgt);
}
} else {
isp_prt(isp, ISP_LOGCONFIG, prom0, bus, lp->portid, lp->handle, buf, "stayed",
(uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);

View File

@ -421,7 +421,8 @@ typedef struct {
target_mode : 1,
portid : 24;
uint32_t
: 6,
: 5,
reported_gone : 1,
announced : 1,
dirty : 1, /* commands have been run */
new_portid : 24;