From cb5b7c13042a34fdb03fd64603b929c49f14b4b6 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Tue, 26 Mar 2013 21:14:51 +0000 Subject: [PATCH] Cap the number of retry attempts to a configurable number. This ensures that if a specific I/O repeatedly times out, we don't retry it indefinitely. The default number of retries will be 4, but is adjusted using hw.nvme.retry_count. Sponsored by: Intel Reviewed by: carl --- sys/dev/nvme/nvme.c | 3 ++- sys/dev/nvme/nvme_ctrlr.c | 7 +++++++ sys/dev/nvme/nvme_private.h | 7 ++++++- sys/dev/nvme/nvme_qpair.c | 34 ++++++++++++++++++++++++---------- 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c index 2a2a8e200f19..cd2a7a6c6247 100644 --- a/sys/dev/nvme/nvme.c +++ b/sys/dev/nvme/nvme.c @@ -49,7 +49,8 @@ struct nvme_consumer { struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS]; #define INVALID_CONSUMER_ID 0xFFFF -uma_zone_t nvme_request_zone; +uma_zone_t nvme_request_zone; +int32_t nvme_retry_count; MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations"); diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index cfca699012b1..c4ec259918e1 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -731,6 +731,10 @@ nvme_ctrlr_start(void *ctrlr_arg) struct nvme_controller *ctrlr = ctrlr_arg; int i; + nvme_qpair_reset(&ctrlr->adminq); + for (i = 0; i < ctrlr->num_io_queues; i++) + nvme_qpair_reset(&ctrlr->ioq[i]); + nvme_admin_qpair_enable(&ctrlr->adminq); if (nvme_ctrlr_identify(ctrlr) != 0) @@ -929,6 +933,9 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) timeout_period = max(timeout_period, NVME_MIN_TIMEOUT_PERIOD); ctrlr->timeout_period = timeout_period; + nvme_retry_count = NVME_DEFAULT_RETRY_COUNT; + TUNABLE_INT_FETCH("hw.nvme.retry_count", &nvme_retry_count); + per_cpu_io_queues = 1; TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues); ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE; diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 091d13caa79b..dd36a86473ba 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -104,6 +104,8 @@ MALLOC_DECLARE(M_NVME); #define NVME_MIN_TIMEOUT_PERIOD (5) #define NVME_MAX_TIMEOUT_PERIOD (120) +#define NVME_DEFAULT_RETRY_COUNT (4) + /* Maximum log page size to fetch for AERs. */ #define NVME_MAX_AER_LOG_SIZE (4096) @@ -111,7 +113,8 @@ MALLOC_DECLARE(M_NVME); #define CACHE_LINE_SIZE (64) #endif -extern uma_zone_t nvme_request_zone; +extern uma_zone_t nvme_request_zone; +extern int32_t nvme_retry_count; struct nvme_request { @@ -122,6 +125,7 @@ struct nvme_request { struct uio *uio; nvme_cb_fn_t cb_fn; void *cb_arg; + int32_t retries; STAILQ_ENTRY(nvme_request) stailq; }; @@ -409,6 +413,7 @@ void nvme_qpair_submit_tracker(struct nvme_qpair *qpair, void nvme_qpair_process_completions(struct nvme_qpair *qpair); void nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req); +void nvme_qpair_reset(struct nvme_qpair *qpair); void nvme_admin_qpair_enable(struct nvme_qpair *qpair); void nvme_admin_qpair_disable(struct nvme_qpair *qpair); diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index 3841095797d1..69842b33cffb 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -105,7 +105,8 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, req = tr->req; error = nvme_completion_is_error(cpl); - retry = error && nvme_completion_is_retry(cpl); + retry = error && nvme_completion_is_retry(cpl) && + req->retries < nvme_retry_count; if (error && print_on_error) { nvme_dump_completion(cpl); @@ -122,9 +123,10 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, mtx_lock(&qpair->lock); callout_stop(&tr->timer); - if (retry) + if (retry) { + req->retries++; nvme_qpair_submit_tracker(qpair, tr); - else { + } else { if (req->payload_size > 0 || req->uio != NULL) bus_dmamap_unload(qpair->dma_tag, tr->payload_dma_map); @@ -568,6 +570,12 @@ nvme_qpair_enable(struct nvme_qpair *qpair) { qpair->is_enabled = TRUE; +} + +void +nvme_qpair_reset(struct nvme_qpair *qpair) +{ + qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; /* @@ -597,19 +605,25 @@ nvme_io_qpair_enable(struct nvme_qpair *qpair) { STAILQ_HEAD(, nvme_request) temp; struct nvme_tracker *tr; + struct nvme_tracker *tr_temp; struct nvme_request *req; + /* + * Manually abort each outstanding I/O. This normally results in a + * retry, unless the retry count on the associated request has + * reached its limit. + */ + TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) { + device_printf(qpair->ctrlr->dev, + "aborting outstanding i/o\n"); + nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, + NVME_SC_ABORTED_BY_REQUEST, TRUE); + } + mtx_lock(&qpair->lock); nvme_qpair_enable(qpair); - TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) { - device_printf(qpair->ctrlr->dev, - "resubmitting outstanding i/o\n"); - nvme_dump_command(&tr->req->cmd); - nvme_qpair_submit_tracker(qpair, tr); - } - STAILQ_INIT(&temp); STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request);