Make nvme_ctrlr_reset a nop if a reset is already in progress.
This protects against cases where a controller crashes with multiple I/O outstanding, each timing out and requesting controller resets simultaneously. While here, remove a debugging printf from a previous commit, and add more logging around I/O that need to be resubmitted after a controller reset. Sponsored by: Intel Reviewed by: carl
This commit is contained in:
parent
48ce317898
commit
f37c22a3bd
@ -422,6 +422,13 @@ nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
|
||||
void
|
||||
nvme_ctrlr_reset(struct nvme_controller *ctrlr)
|
||||
{
|
||||
int cmpset;
|
||||
|
||||
cmpset = atomic_cmpset_32(&ctrlr->is_resetting, 0, 1);
|
||||
|
||||
if (cmpset == 0)
|
||||
/* Controller is already resetting. */
|
||||
return;
|
||||
|
||||
taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
|
||||
}
|
||||
@ -700,6 +707,8 @@ nvme_ctrlr_reset_task(void *arg, int pending)
|
||||
pause("nvmereset", hz / 10);
|
||||
if (status == 0)
|
||||
nvme_ctrlr_start(ctrlr);
|
||||
|
||||
atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -896,6 +905,8 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
|
||||
taskqueue_thread_enqueue, &ctrlr->taskqueue);
|
||||
taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
|
||||
|
||||
ctrlr->is_resetting = 0;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -100,7 +100,7 @@ MALLOC_DECLARE(M_NVME);
|
||||
#define NVME_MAX_CONSUMERS (2)
|
||||
#define NVME_MAX_ASYNC_EVENTS (8)
|
||||
|
||||
#define NVME_DEFAULT_TIMEOUT_PERIOD (30) /* in seconds */
|
||||
#define NVME_DEFAULT_TIMEOUT_PERIOD (30) /* in seconds */
|
||||
#define NVME_MIN_TIMEOUT_PERIOD (5)
|
||||
#define NVME_MAX_TIMEOUT_PERIOD (120)
|
||||
|
||||
@ -280,6 +280,8 @@ struct nvme_controller {
|
||||
|
||||
void *cons_cookie[NVME_MAX_CONSUMERS];
|
||||
|
||||
uint32_t is_resetting;
|
||||
|
||||
#ifdef CHATHAM2
|
||||
uint64_t chatham_size;
|
||||
uint64_t chatham_lbas;
|
||||
|
@ -142,7 +142,13 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
|
||||
TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq);
|
||||
TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
|
||||
|
||||
if (!STAILQ_EMPTY(&qpair->queued_req)) {
|
||||
/*
|
||||
* If the controller is in the middle of resetting, don't
|
||||
* try to submit queued requests here - let the reset logic
|
||||
* handle that instead.
|
||||
*/
|
||||
if (!STAILQ_EMPTY(&qpair->queued_req) &&
|
||||
!qpair->ctrlr->is_resetting) {
|
||||
req = STAILQ_FIRST(&qpair->queued_req);
|
||||
STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
|
||||
_nvme_qpair_submit_request(qpair, req);
|
||||
@ -462,8 +468,6 @@ nvme_timeout(void *arg)
|
||||
|
||||
/* Read csts to get value of cfs - controller fatal status. */
|
||||
csts.raw = nvme_mmio_read_4(ctrlr, csts);
|
||||
device_printf(ctrlr->dev, "i/o timeout, csts.cfs=%d\n", csts.bits.cfs);
|
||||
nvme_dump_command(&tr->req->cmd);
|
||||
|
||||
if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
|
||||
/*
|
||||
@ -606,8 +610,12 @@ nvme_io_qpair_enable(struct nvme_qpair *qpair)
|
||||
|
||||
nvme_qpair_enable(qpair);
|
||||
|
||||
TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
|
||||
TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) {
|
||||
device_printf(qpair->ctrlr->dev,
|
||||
"resubmitting outstanding i/o\n");
|
||||
nvme_dump_command(&tr->req->cmd);
|
||||
nvme_qpair_submit_tracker(qpair, tr);
|
||||
}
|
||||
|
||||
STAILQ_INIT(&temp);
|
||||
STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request);
|
||||
@ -615,6 +623,9 @@ nvme_io_qpair_enable(struct nvme_qpair *qpair)
|
||||
while (!STAILQ_EMPTY(&temp)) {
|
||||
req = STAILQ_FIRST(&temp);
|
||||
STAILQ_REMOVE_HEAD(&temp, stailq);
|
||||
device_printf(qpair->ctrlr->dev,
|
||||
"resubmitting queued i/o\n");
|
||||
nvme_dump_command(&req->cmd);
|
||||
_nvme_qpair_submit_request(qpair, req);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user