Make nvme_ctrlr_reset a nop if a reset is already in progress.

This protects against cases where a controller crashes with multiple
I/O outstanding, each timing out and requesting controller resets
simultaneously.

While here, remove a debugging printf from a previous commit, and add
more logging around I/O that need to be resubmitted after a controller
reset.

Sponsored by:	Intel
Reviewed by:	carl
This commit is contained in:
Jim Harris 2013-03-26 20:56:58 +00:00
parent 48ce317898
commit f37c22a3bd
3 changed files with 29 additions and 5 deletions

View File

@ -422,6 +422,13 @@ nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
void
nvme_ctrlr_reset(struct nvme_controller *ctrlr)
{
int cmpset;
cmpset = atomic_cmpset_32(&ctrlr->is_resetting, 0, 1);
if (cmpset == 0)
/* Controller is already resetting. */
return;
taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
}
@ -700,6 +707,8 @@ nvme_ctrlr_reset_task(void *arg, int pending)
pause("nvmereset", hz / 10);
if (status == 0)
nvme_ctrlr_start(ctrlr);
atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
}
static void
@ -896,6 +905,8 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
taskqueue_thread_enqueue, &ctrlr->taskqueue);
taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
ctrlr->is_resetting = 0;
return (0);
}

View File

@ -100,7 +100,7 @@ MALLOC_DECLARE(M_NVME);
#define NVME_MAX_CONSUMERS (2)
#define NVME_MAX_ASYNC_EVENTS (8)
#define NVME_DEFAULT_TIMEOUT_PERIOD (30) /* in seconds */
#define NVME_DEFAULT_TIMEOUT_PERIOD (30) /* in seconds */
#define NVME_MIN_TIMEOUT_PERIOD (5)
#define NVME_MAX_TIMEOUT_PERIOD (120)
@ -280,6 +280,8 @@ struct nvme_controller {
void *cons_cookie[NVME_MAX_CONSUMERS];
uint32_t is_resetting;
#ifdef CHATHAM2
uint64_t chatham_size;
uint64_t chatham_lbas;

View File

@ -142,7 +142,13 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq);
TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
if (!STAILQ_EMPTY(&qpair->queued_req)) {
/*
* If the controller is in the middle of resetting, don't
* try to submit queued requests here - let the reset logic
* handle that instead.
*/
if (!STAILQ_EMPTY(&qpair->queued_req) &&
!qpair->ctrlr->is_resetting) {
req = STAILQ_FIRST(&qpair->queued_req);
STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
_nvme_qpair_submit_request(qpair, req);
@ -462,8 +468,6 @@ nvme_timeout(void *arg)
/* Read csts to get value of cfs - controller fatal status. */
csts.raw = nvme_mmio_read_4(ctrlr, csts);
device_printf(ctrlr->dev, "i/o timeout, csts.cfs=%d\n", csts.bits.cfs);
nvme_dump_command(&tr->req->cmd);
if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
/*
@ -606,8 +610,12 @@ nvme_io_qpair_enable(struct nvme_qpair *qpair)
nvme_qpair_enable(qpair);
TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) {
device_printf(qpair->ctrlr->dev,
"resubmitting outstanding i/o\n");
nvme_dump_command(&tr->req->cmd);
nvme_qpair_submit_tracker(qpair, tr);
}
STAILQ_INIT(&temp);
STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request);
@ -615,6 +623,9 @@ nvme_io_qpair_enable(struct nvme_qpair *qpair)
while (!STAILQ_EMPTY(&temp)) {
req = STAILQ_FIRST(&temp);
STAILQ_REMOVE_HEAD(&temp, stailq);
device_printf(qpair->ctrlr->dev,
"resubmitting queued i/o\n");
nvme_dump_command(&req->cmd);
_nvme_qpair_submit_request(qpair, req);
}