From c37fc318c480325d211fd20f79b05ffb6edd42b5 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Fri, 19 Jul 2019 18:39:18 +0000 Subject: [PATCH] Keep track of the number of retried commands. Retried commands can indicate a performance degredation of an nvme drive. Keep track of the number of retries and report it out via sysctl, just like number of commands an interrupts. --- sys/dev/nvme/nvme_private.h | 1 + sys/dev/nvme/nvme_qpair.c | 3 +++ sys/dev/nvme/nvme_sysctl.c | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 6b363be018c8..e7e923f2e0bc 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -190,6 +190,7 @@ struct nvme_qpair { int64_t num_cmds; int64_t num_intr_handler_calls; + int64_t num_retries; struct nvme_command *cmd; struct nvme_completion *cpl; diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index 098b6d63f532..7c28f04213a7 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -393,6 +393,8 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, error = nvme_completion_is_error(cpl); retry = error && nvme_completion_is_retry(cpl) && req->retries < nvme_retry_count; + if (retry) + qpair->num_retries++; if (error && (print_on_error == ERROR_PRINT_ALL || (!retry && print_on_error == ERROR_PRINT_NO_RETRY))) { @@ -684,6 +686,7 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, qpair->num_cmds = 0; qpair->num_intr_handler_calls = 0; + qpair->num_retries = 0; qpair->cmd = (struct nvme_command *)queuemem; qpair->cpl = (struct nvme_completion *)(queuemem + cmdsz); prpmem = (uint8_t *)(queuemem + cmdsz + cplsz); diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c index 85c28435e2af..b09fa20dd1ee 100644 --- a/sys/dev/nvme/nvme_sysctl.c +++ b/sys/dev/nvme/nvme_sysctl.c @@ -166,6 +166,7 @@ nvme_qpair_reset_stats(struct nvme_qpair *qpair) qpair->num_cmds = 0; qpair->num_intr_handler_calls = 0; + qpair->num_retries = 0; } static int @@ -198,6 +199,21 @@ nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS) return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req)); } +static int +nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS) +{ + struct nvme_controller *ctrlr = arg1; + int64_t num_retries = 0; + int i; + + num_retries = ctrlr->adminq.num_retries; + + for (i = 0; i < ctrlr->num_io_queues; i++) + num_retries += ctrlr->ioq[i].num_retries; + + return (sysctl_handle_64(oidp, &num_retries, 0, req)); +} + static int nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS) { @@ -249,6 +265,8 @@ nvme_sysctl_initialize_queue(struct nvme_qpair *qpair, "Number of times interrupt handler was invoked (will typically be " "less than number of actual interrupts generated due to " "coalescing)"); + SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_retries", + CTLFLAG_RD, &qpair->num_retries, "Number of commands retried"); SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO, "dump_debug", CTLTYPE_UINT | CTLFLAG_RW, qpair, 0, @@ -300,6 +318,11 @@ nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr) "typically be less than number of actual interrupts " "generated due to coalescing)"); + SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, + "num_retries", CTLTYPE_S64 | CTLFLAG_RD, + ctrlr, 0, nvme_sysctl_num_retries, "IU", + "Number of commands retried"); + SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "reset_stats", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0, nvme_sysctl_reset_stats, "IU", "Reset statistics to zero");