Fix the mly driver! If card resources became unavailable, the driver would
correctly tell CAM to requeue the command and then freeze it's queue. The problem was that when resources became available again, it wouldn't tell CAM to unfreeze it's queue, so no more commands would ever be delivered. This is simialr to the bug that was fixed in the cciss driver last year. This is a bug in 4-STABLE also, but is probably masked by the OS being fast enough to drain the completion queue before it fills up. Also add some diagnostics avaialble when compiled with MLY_DEBUG. Thanks very much to LSI Corp for donating equipment to track this down, and Vaidus Damosevicius for pestering me long enough to get it fixed.
This commit is contained in:
parent
e7fb1825df
commit
f0e4158cff
@ -121,6 +121,7 @@ static void mly_print_packet(struct mly_command *mc);
|
||||
static void mly_panic(struct mly_softc *sc, char *reason);
|
||||
#endif
|
||||
void mly_print_controller(int controller);
|
||||
static int mly_timeout(struct mly_softc *sc);
|
||||
|
||||
|
||||
static d_open_t mly_user_open;
|
||||
@ -129,6 +130,7 @@ static d_ioctl_t mly_user_ioctl;
|
||||
static int mly_user_command(struct mly_softc *sc, struct mly_user_command *uc);
|
||||
static int mly_user_health(struct mly_softc *sc, struct mly_user_health *uh);
|
||||
|
||||
#define MLY_CMD_TIMEOUT 20
|
||||
|
||||
static device_method_t mly_methods[] = {
|
||||
/* Device interface */
|
||||
@ -330,6 +332,10 @@ mly_attach(device_t dev)
|
||||
/* enable interrupts now */
|
||||
MLY_UNMASK_INTERRUPTS(sc);
|
||||
|
||||
#ifdef MLY_DEBUG
|
||||
timeout((timeout_t *)mly_timeout, sc, MLY_CMD_TIMEOUT * hz);
|
||||
#endif
|
||||
|
||||
out:
|
||||
if (error != 0)
|
||||
mly_free(sc);
|
||||
@ -1473,6 +1479,10 @@ mly_start(struct mly_command *mc)
|
||||
mly_map_command(mc);
|
||||
mc->mc_packet->generic.command_id = mc->mc_slot;
|
||||
|
||||
#ifdef MLY_DEBUG
|
||||
mc->mc_timestamp = time_second;
|
||||
#endif
|
||||
|
||||
s = splcam();
|
||||
|
||||
/*
|
||||
@ -2173,6 +2183,7 @@ mly_cam_action_io(struct cam_sim *sim, struct ccb_scsiio *csio)
|
||||
struct mly_command_scsi_small *ss;
|
||||
int bus, target;
|
||||
int error;
|
||||
int s;
|
||||
|
||||
bus = cam_sim_bus(sim);
|
||||
target = csio->ccb_h.target_id;
|
||||
@ -2231,8 +2242,11 @@ mly_cam_action_io(struct cam_sim *sim, struct ccb_scsiio *csio)
|
||||
* Get a command, or push the ccb back to CAM and freeze the queue.
|
||||
*/
|
||||
if ((error = mly_alloc_command(sc, &mc))) {
|
||||
s = splcam();
|
||||
xpt_freeze_simq(sim, 1);
|
||||
csio->ccb_h.status |= CAM_REQUEUE_REQ;
|
||||
sc->mly_qfrzn_cnt++;
|
||||
splx(s);
|
||||
return(error);
|
||||
}
|
||||
|
||||
@ -2276,8 +2290,11 @@ mly_cam_action_io(struct cam_sim *sim, struct ccb_scsiio *csio)
|
||||
|
||||
/* give the command to the controller */
|
||||
if ((error = mly_start(mc))) {
|
||||
s = splcam();
|
||||
xpt_freeze_simq(sim, 1);
|
||||
csio->ccb_h.status |= CAM_REQUEUE_REQ;
|
||||
sc->mly_qfrzn_cnt++;
|
||||
splx(s);
|
||||
return(error);
|
||||
}
|
||||
|
||||
@ -2309,6 +2326,7 @@ mly_cam_complete(struct mly_command *mc)
|
||||
struct mly_btl *btl;
|
||||
u_int8_t cmd;
|
||||
int bus, target;
|
||||
int s;
|
||||
|
||||
debug_called(2);
|
||||
|
||||
@ -2360,6 +2378,14 @@ mly_cam_complete(struct mly_command *mc)
|
||||
csio->ccb_h.status = CAM_REQ_CMP_ERR;
|
||||
break;
|
||||
}
|
||||
|
||||
s = splcam();
|
||||
if (sc->mly_qfrzn_cnt) {
|
||||
csio->ccb_h.status |= CAM_RELEASE_SIMQ;
|
||||
sc->mly_qfrzn_cnt--;
|
||||
}
|
||||
splx(s);
|
||||
|
||||
xpt_done((union ccb *)csio);
|
||||
mly_release_command(mc);
|
||||
}
|
||||
@ -2945,3 +2971,23 @@ mly_user_health(struct mly_softc *sc, struct mly_user_health *uh)
|
||||
sizeof(uh->HealthStatusBuffer));
|
||||
return(error);
|
||||
}
|
||||
|
||||
static int
|
||||
mly_timeout(struct mly_softc *sc)
|
||||
{
|
||||
struct mly_command *mc;
|
||||
int deadline;
|
||||
|
||||
deadline = time_second - MLY_CMD_TIMEOUT;
|
||||
TAILQ_FOREACH(mc, &sc->mly_busy, mc_link) {
|
||||
if ((mc->mc_timestamp < deadline)) {
|
||||
device_printf(sc->mly_dev,
|
||||
"COMMAND %p TIMEOUT AFTER %d SECONDS\n", mc,
|
||||
(int)(time_second - mc->mc_timestamp));
|
||||
}
|
||||
}
|
||||
|
||||
timeout((timeout_t *)mly_timeout, sc, MLY_CMD_TIMEOUT * hz);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -148,6 +148,7 @@ struct mly_command {
|
||||
void (* mc_complete)(struct mly_command *mc); /* completion handler */
|
||||
void *mc_private; /* caller-private data */
|
||||
|
||||
int mc_timestamp;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -237,6 +238,7 @@ struct mly_softc {
|
||||
/* command-completion task */
|
||||
struct task mly_task_complete; /* deferred-completion task */
|
||||
#endif
|
||||
int mly_qfrzn_cnt; /* Track simq freezes */
|
||||
};
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user