Wait until completion context unwinds before retrying CCBs that have been
queued internally. This works around issue in the isci HAL where it cannot accept new I/O to a device after a resetting->ready state transition until the completion context has unwound. This issue was found by submitting non-tagged CCBs through pass(4) interface to a SATA disk with an extremely small timeout value (5ms). This would trigger internal resets with I/O in the isci(4) internal queues. The small timeout value had not been intentional (and original reporter has since changed his test to use 5sec instead), but it did uncover this corner case that would result in a hung disk. Sponsored by: Intel Reported and tested by: Ravi Pokala <rpokala at panasas dot com> Reviewed by: scottl (earlier version) MFC after: 1 week
This commit is contained in:
parent
8d75cfbaa6
commit
1acd03e35e
@ -30,6 +30,9 @@
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _ISCI_H
|
||||
#define _ISCI_H
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
@ -86,7 +89,31 @@ struct ISCI_REMOTE_DEVICE {
|
||||
BOOL is_resetting;
|
||||
uint32_t frozen_lun_mask;
|
||||
SCI_FAST_LIST_ELEMENT_T pending_device_reset_element;
|
||||
|
||||
/*
|
||||
* This queue maintains CCBs that have been returned with
|
||||
* SCI_IO_FAILURE_INVALID_STATE from the SCI layer. These CCBs
|
||||
* need to be retried, but we cannot return CAM_REQUEUE_REQ because
|
||||
* this status gets passed all the way back up to users of the pass(4)
|
||||
* interface and breaks things like smartctl. So instead, we queue
|
||||
* these CCBs internally.
|
||||
*/
|
||||
TAILQ_HEAD(,ccb_hdr) queued_ccbs;
|
||||
|
||||
/*
|
||||
* Marker denoting this remote device needs its first queued ccb to
|
||||
* be retried.
|
||||
*/
|
||||
BOOL release_queued_ccb;
|
||||
|
||||
/*
|
||||
* Points to a CCB in the queue that is currently being processed by
|
||||
* SCIL. This allows us to keep in flight CCBs in the queue so as to
|
||||
* maintain ordering (i.e. in case we retry an I/O and then find out
|
||||
* it needs to be retried again - it just keeps its same place in the
|
||||
* queue.
|
||||
*/
|
||||
union ccb * queued_ccb_in_progress;
|
||||
};
|
||||
|
||||
struct ISCI_DOMAIN {
|
||||
@ -126,6 +153,7 @@ struct ISCI_CONTROLLER
|
||||
BOOL has_been_scanned;
|
||||
uint32_t initial_discovery_mask;
|
||||
BOOL is_frozen;
|
||||
BOOL release_queued_ccbs;
|
||||
uint8_t *remote_device_memory;
|
||||
struct ISCI_MEMORY cached_controller_memory;
|
||||
struct ISCI_MEMORY uncached_controller_memory;
|
||||
@ -291,6 +319,8 @@ int isci_controller_attach_to_cam(struct ISCI_CONTROLLER *controller);
|
||||
|
||||
void isci_controller_start(void *controller);
|
||||
|
||||
void isci_controller_release_queued_ccbs(struct ISCI_CONTROLLER *controller);
|
||||
|
||||
void isci_domain_construct(struct ISCI_DOMAIN *domain, uint32_t domain_index,
|
||||
struct ISCI_CONTROLLER *controller);
|
||||
|
||||
@ -301,3 +331,5 @@ void isci_log_message(uint32_t verbosity, char *log_message_prefix,
|
||||
char *log_message, ...);
|
||||
|
||||
extern uint32_t g_isci_debug_level;
|
||||
|
||||
#endif /* #ifndef _ISCI_H */
|
||||
|
@ -201,6 +201,7 @@ void isci_controller_construct(struct ISCI_CONTROLLER *controller,
|
||||
|
||||
controller->is_started = FALSE;
|
||||
controller->is_frozen = FALSE;
|
||||
controller->release_queued_ccbs = FALSE;
|
||||
controller->sim = NULL;
|
||||
controller->initial_discovery_mask = 0;
|
||||
|
||||
@ -431,6 +432,8 @@ int isci_controller_allocate_memory(struct ISCI_CONTROLLER *controller)
|
||||
sci_fast_list_element_init(remote_device,
|
||||
&remote_device->pending_device_reset_element);
|
||||
TAILQ_INIT(&remote_device->queued_ccbs);
|
||||
remote_device->release_queued_ccb = FALSE;
|
||||
remote_device->queued_ccb_in_progress = NULL;
|
||||
|
||||
/*
|
||||
* For the first SCI_MAX_DOMAINS device objects, do not put
|
||||
@ -694,3 +697,47 @@ void isci_action(struct cam_sim *sim, union ccb *ccb)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Unfortunately, SCIL doesn't cleanly handle retry conditions.
|
||||
* CAM_REQUEUE_REQ works only when no one is using the pass(4) interface. So
|
||||
* when SCIL denotes an I/O needs to be retried (typically because of mixing
|
||||
* tagged/non-tagged ATA commands, or running out of NCQ slots), we queue
|
||||
* these I/O internally. Once SCIL completes an I/O to this device, or we get
|
||||
* a ready notification, we will retry the first I/O on the queue.
|
||||
* Unfortunately, SCIL also doesn't cleanly handle starting the new I/O within
|
||||
* the context of the completion handler, so we need to retry these I/O after
|
||||
* the completion handler is done executing.
|
||||
*/
|
||||
void
|
||||
isci_controller_release_queued_ccbs(struct ISCI_CONTROLLER *controller)
|
||||
{
|
||||
struct ISCI_REMOTE_DEVICE *dev;
|
||||
struct ccb_hdr *ccb_h;
|
||||
int dev_idx;
|
||||
|
||||
KASSERT(mtx_owned(&controller->lock), ("controller lock not owned"));
|
||||
|
||||
controller->release_queued_ccbs = FALSE;
|
||||
for (dev_idx = 0;
|
||||
dev_idx < SCI_MAX_REMOTE_DEVICES;
|
||||
dev_idx++) {
|
||||
|
||||
dev = controller->remote_device[dev_idx];
|
||||
if (dev != NULL &&
|
||||
dev->release_queued_ccb == TRUE &&
|
||||
dev->queued_ccb_in_progress == NULL) {
|
||||
dev->release_queued_ccb = FALSE;
|
||||
ccb_h = TAILQ_FIRST(&dev->queued_ccbs);
|
||||
|
||||
if (ccb_h == NULL)
|
||||
continue;
|
||||
|
||||
isci_log_message(1, "ISCI", "release %p %x\n", ccb_h,
|
||||
((union ccb *)ccb_h)->csio.cdb_io.cdb_bytes[0]);
|
||||
|
||||
dev->queued_ccb_in_progress = (union ccb *)ccb_h;
|
||||
isci_io_request_execute_scsi_io(
|
||||
(union ccb *)ccb_h, controller);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -177,6 +177,9 @@ isci_interrupt_legacy_handler(void *arg)
|
||||
if (interrupt_handler(scic_controller_handle)) {
|
||||
mtx_lock(&controller->lock);
|
||||
completion_handler(scic_controller_handle);
|
||||
if (controller->release_queued_ccbs == TRUE)
|
||||
isci_controller_release_queued_ccbs(
|
||||
controller);
|
||||
mtx_unlock(&controller->lock);
|
||||
}
|
||||
}
|
||||
@ -204,6 +207,13 @@ isci_interrupt_msix_handler(void *arg)
|
||||
if (interrupt_handler(scic_controller_handle)) {
|
||||
mtx_lock(&controller->lock);
|
||||
completion_handler(scic_controller_handle);
|
||||
/*
|
||||
* isci_controller_release_queued_ccb() is a relatively
|
||||
* expensive routine, so we don't call it until the controller
|
||||
* level flag is set to TRUE.
|
||||
*/
|
||||
if (controller->release_queued_ccbs == TRUE)
|
||||
isci_controller_release_queued_ccbs(controller);
|
||||
mtx_unlock(&controller->lock);
|
||||
}
|
||||
}
|
||||
|
@ -223,7 +223,7 @@ isci_io_request_complete(SCI_CONTROLLER_HANDLE_T scif_controller,
|
||||
(struct ISCI_REQUEST *)isci_request);
|
||||
|
||||
if (complete_ccb) {
|
||||
if (ccb->ccb_h.status != CAM_REQ_CMP) {
|
||||
if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
|
||||
/* ccb will be completed with some type of non-success
|
||||
* status. So temporarily freeze the queue until the
|
||||
* upper layers can act on the status. The
|
||||
@ -234,6 +234,26 @@ isci_io_request_complete(SCI_CONTROLLER_HANDLE_T scif_controller,
|
||||
xpt_freeze_devq(ccb->ccb_h.path, 1);
|
||||
}
|
||||
|
||||
if (ccb->ccb_h.status & CAM_SIM_QUEUED) {
|
||||
|
||||
KASSERT(ccb == isci_remote_device->queued_ccb_in_progress,
|
||||
("multiple internally queued ccbs in flight"));
|
||||
|
||||
TAILQ_REMOVE(&isci_remote_device->queued_ccbs,
|
||||
&ccb->ccb_h, sim_links.tqe);
|
||||
ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
|
||||
|
||||
/*
|
||||
* This CCB that was in the queue was completed, so
|
||||
* set the in_progress pointer to NULL denoting that
|
||||
* we can retry another CCB from the queue. We only
|
||||
* allow one CCB at a time from the queue to be
|
||||
* in progress so that we can effectively maintain
|
||||
* ordering.
|
||||
*/
|
||||
isci_remote_device->queued_ccb_in_progress = NULL;
|
||||
}
|
||||
|
||||
if (isci_remote_device->frozen_lun_mask != 0) {
|
||||
isci_remote_device_release_device_queue(isci_remote_device);
|
||||
}
|
||||
@ -248,11 +268,30 @@ isci_io_request_complete(SCI_CONTROLLER_HANDLE_T scif_controller,
|
||||
isci_remote_device_freeze_lun_queue(isci_remote_device,
|
||||
ccb->ccb_h.target_lun);
|
||||
|
||||
isci_log_message(1, "ISCI", "queue %p %x\n", ccb,
|
||||
ccb->csio.cdb_io.cdb_bytes[0]);
|
||||
ccb->ccb_h.status |= CAM_SIM_QUEUED;
|
||||
TAILQ_INSERT_TAIL(&isci_remote_device->queued_ccbs,
|
||||
&ccb->ccb_h, sim_links.tqe);
|
||||
if (ccb->ccb_h.status & CAM_SIM_QUEUED) {
|
||||
|
||||
KASSERT(ccb == isci_remote_device->queued_ccb_in_progress,
|
||||
("multiple internally queued ccbs in flight"));
|
||||
|
||||
/*
|
||||
* Do nothing, CCB is already on the device's queue.
|
||||
* We leave it on the queue, to be retried again
|
||||
* next time a CCB on this device completes, or we
|
||||
* get a ready notification for this device.
|
||||
*/
|
||||
isci_log_message(1, "ISCI", "already queued %p %x\n",
|
||||
ccb, ccb->csio.cdb_io.cdb_bytes[0]);
|
||||
|
||||
isci_remote_device->queued_ccb_in_progress = NULL;
|
||||
|
||||
} else {
|
||||
isci_log_message(1, "ISCI", "queue %p %x\n", ccb,
|
||||
ccb->csio.cdb_io.cdb_bytes[0]);
|
||||
ccb->ccb_h.status |= CAM_SIM_QUEUED;
|
||||
|
||||
TAILQ_INSERT_TAIL(&isci_remote_device->queued_ccbs,
|
||||
&ccb->ccb_h, sim_links.tqe);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,14 +297,18 @@ isci_remote_device_release_device_queue(
|
||||
for (lun = 0; lun < ISCI_MAX_LUN; lun++)
|
||||
isci_remote_device_release_lun_queue(device, lun);
|
||||
} else {
|
||||
struct ccb_hdr *ccb_h;
|
||||
/*
|
||||
* We cannot unfreeze the devq, because there are still
|
||||
* CCBs in our internal queue that need to be processed
|
||||
* first. Mark this device, and the controller, so that
|
||||
* the first CCB in this device's internal queue will be
|
||||
* resubmitted after the current completion context
|
||||
* unwinds.
|
||||
*/
|
||||
device->release_queued_ccb = TRUE;
|
||||
device->domain->controller->release_queued_ccbs = TRUE;
|
||||
|
||||
ccb_h = TAILQ_FIRST(&device->queued_ccbs);
|
||||
TAILQ_REMOVE(&device->queued_ccbs, ccb_h, sim_links.tqe);
|
||||
ccb_h->status &= ~CAM_SIM_QUEUED;
|
||||
isci_log_message(1, "ISCI", "release %p %x\n", ccb_h,
|
||||
((union ccb*)ccb_h)->csio.cdb_io.cdb_bytes[0]);
|
||||
isci_io_request_execute_scsi_io((union ccb *)ccb_h,
|
||||
device->domain->controller);
|
||||
isci_log_message(1, "ISCI", "schedule %p for release\n",
|
||||
device);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user