diff --git a/sbin/nvmecontrol/nvmecontrol.8 b/sbin/nvmecontrol/nvmecontrol.8 index e726cbc08927..7e26240a95ba 100644 --- a/sbin/nvmecontrol/nvmecontrol.8 +++ b/sbin/nvmecontrol/nvmecontrol.8 @@ -33,7 +33,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 17, 2012 +.Dd March 26, 2013 .Dt NVMECONTROL 8 .Os .Sh NAME @@ -54,7 +54,10 @@ .Op Fl p .Aq Fl s Ar size_in_bytes .Aq Fl t Ar time_in_sec -.Aq device id +.Aq namespace id +.Nm +.Ic reset +.Aq controller id .Sh DESCRIPTION NVM Express (NVMe) is a storage protocol standard, for SSDs and other high-speed storage devices over PCI Express. @@ -62,6 +65,7 @@ high-speed storage devices over PCI Express. .Dl nvmecontrol devlist .Pp Display a list of NVMe controllers and namespaces along with their device nodes. +.Pp .Dl nvmecontrol identify nvme0 .Pp Display a human-readable summary of the nvme0 IDENTIFY_CONTROLLER data. @@ -76,6 +80,10 @@ Display a hexadecimal dump of the nvme0 IDENTIFY_NAMESPACE data for namespace Run a performance test on nvme0ns1 using 32 kernel threads for 30 seconds. Each thread will issue a single 512 byte read command. Results are printed to stdout when 30 seconds expires. +.Pp +.Dl nvmecontrol reset nvme0 +.Pp +Perform a controller-level reset of the nvme0 controller. .Sh AUTHORS .An -nosplit .Nm diff --git a/sbin/nvmecontrol/nvmecontrol.c b/sbin/nvmecontrol/nvmecontrol.c index 6abd3f5d1d7a..b6b6908ea524 100644 --- a/sbin/nvmecontrol/nvmecontrol.c +++ b/sbin/nvmecontrol/nvmecontrol.c @@ -56,6 +56,9 @@ __FBSDID("$FreeBSD$"); " <-i intr|wait> [-f refthread] [-p]\n" \ " \n" +#define RESET_USAGE \ +" nvmecontrol reset \n" + static void perftest_usage(void); static void @@ -64,6 +67,7 @@ usage(void) fprintf(stderr, "usage:\n"); fprintf(stderr, DEVLIST_USAGE); fprintf(stderr, IDENTIFY_USAGE); + fprintf(stderr, RESET_USAGE); fprintf(stderr, PERFTEST_USAGE); exit(EX_USAGE); } @@ -580,6 +584,41 @@ perftest(int argc, char *argv[]) exit(EX_OK); } +static void +reset_ctrlr(int argc, char *argv[]) +{ + struct stat devstat; + char path[64]; + int ch, fd; + + while ((ch = getopt(argc, argv, "")) != -1) { + switch ((char)ch) { + default: + usage(); + } + } + + sprintf(path, "/dev/%s", argv[optind]); + + if (stat(path, &devstat) != 0) { + printf("Invalid device node '%s'.\n", path); + exit(EX_IOERR); + } + + fd = open(path, O_RDWR); + if (fd < 0) { + printf("Could not open %s.\n", path); + exit(EX_NOPERM); + } + + if (ioctl(fd, NVME_RESET_CONTROLLER) == -1) { + printf("ioctl to %s failed.\n", path); + exit(EX_IOERR); + } + + exit(EX_OK); +} + int main(int argc, char *argv[]) { @@ -593,6 +632,8 @@ main(int argc, char *argv[]) identify(argc-1, &argv[1]); else if (strcmp(argv[1], "perftest") == 0) perftest(argc-1, &argv[1]); + else if (strcmp(argv[1], "reset") == 0) + reset_ctrlr(argc-1, &argv[1]); usage(); diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c index b5e010ef4399..7630a1db4ca5 100644 --- a/sys/dev/nvme/nvme.c +++ b/sys/dev/nvme/nvme.c @@ -255,7 +255,7 @@ nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) } } - nvme_qpair_submit_cmd(tr->qpair, tr); + nvme_qpair_submit_tracker(tr->qpair, tr); } static int @@ -274,11 +274,11 @@ nvme_attach(device_t dev) * to cc.en==0. This is because we don't really know what status * the controller was left in when boot handed off to OS. */ - status = nvme_ctrlr_reset(ctrlr); + status = nvme_ctrlr_hw_reset(ctrlr); if (status != 0) return (status); - status = nvme_ctrlr_reset(ctrlr); + status = nvme_ctrlr_hw_reset(ctrlr); if (status != 0) return (status); diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h index a9d214e68f0a..654294a00ed5 100644 --- a/sys/dev/nvme/nvme.h +++ b/sys/dev/nvme/nvme.h @@ -37,6 +37,7 @@ #define NVME_IDENTIFY_NAMESPACE _IOR('n', 1, struct nvme_namespace_data) #define NVME_IO_TEST _IOWR('n', 2, struct nvme_io_test) #define NVME_BIO_TEST _IOWR('n', 4, struct nvme_io_test) +#define NVME_RESET_CONTROLLER _IO('n', 5) /* * Use to mark a command to apply to all namespaces, or to retrieve global diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 668ac7f04fbe..4a4956d9fa13 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -405,13 +405,31 @@ nvme_ctrlr_enable(struct nvme_controller *ctrlr) } int -nvme_ctrlr_reset(struct nvme_controller *ctrlr) +nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) { + int i; + + nvme_admin_qpair_disable(&ctrlr->adminq); + for (i = 0; i < ctrlr->num_io_queues; i++) + nvme_io_qpair_disable(&ctrlr->ioq[i]); + + DELAY(100*1000); nvme_ctrlr_disable(ctrlr); return (nvme_ctrlr_enable(ctrlr)); } +void +nvme_ctrlr_reset(struct nvme_controller *ctrlr) +{ + int status; + + status = nvme_ctrlr_hw_reset(ctrlr); + DELAY(100*1000); + if (status == 0) + nvme_ctrlr_start(ctrlr); +} + static int nvme_ctrlr_identify(struct nvme_controller *ctrlr) { @@ -626,6 +644,9 @@ void nvme_ctrlr_start(void *ctrlr_arg) { struct nvme_controller *ctrlr = ctrlr_arg; + int i; + + nvme_admin_qpair_enable(&ctrlr->adminq); if (nvme_ctrlr_identify(ctrlr) != 0) goto err; @@ -642,16 +663,26 @@ nvme_ctrlr_start(void *ctrlr_arg) nvme_ctrlr_configure_aer(ctrlr); nvme_ctrlr_configure_int_coalescing(ctrlr); + for (i = 0; i < ctrlr->num_io_queues; i++) + nvme_io_qpair_enable(&ctrlr->ioq[i]); + ctrlr->is_started = TRUE; err: - /* - * Initialize sysctls, even if controller failed to start, to - * assist with debugging admin queue pair. - */ - nvme_sysctl_initialize_ctrlr(ctrlr); - config_intrhook_disestablish(&ctrlr->config_hook); + if (ctrlr->num_start_attempts == 0) { + /* + * Initialize sysctls, even if controller failed to start, to + * assist with debugging admin queue pair. Only run this + * code on the initial start attempt though, and not + * subsequent start attempts due to controller-level resets. + * + */ + nvme_sysctl_initialize_ctrlr(ctrlr); + config_intrhook_disestablish(&ctrlr->config_hook); + } + + ctrlr->num_start_attempts++; } static void @@ -730,6 +761,9 @@ nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, return (ENXIO); memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); break; + case NVME_RESET_CONTROLLER: + nvme_ctrlr_reset(ctrlr); + break; default: return (ENOTTY); } @@ -752,6 +786,7 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) ctrlr->dev = dev; ctrlr->is_started = FALSE; + ctrlr->num_start_attempts = 0; status = nvme_ctrlr_allocate_bar(ctrlr); @@ -835,14 +870,10 @@ intx: void nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev) { - struct nvme_namespace *ns; int i; - for (i = 0; i < NVME_MAX_NAMESPACES; i++) { - ns = &ctrlr->ns[i]; - if (ns->cdev) - destroy_dev(ns->cdev); - } + for (i = 0; i < NVME_MAX_NAMESPACES; i++) + nvme_ns_destruct(&ctrlr->ns[i]); if (ctrlr->cdev) destroy_dev(ctrlr->cdev); @@ -853,13 +884,6 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev) free(ctrlr->ioq, M_NVME); - /* Manually abort outstanding async event requests. */ - for (i = 0; i < ctrlr->num_aers; i++) { - nvme_qpair_manual_abort_request(&ctrlr->adminq, - ctrlr->aer[i].req, NVME_SCT_GENERIC, - NVME_SC_ABORTED_SQ_DELETION, FALSE); - } - nvme_admin_qpair_destroy(&ctrlr->adminq); if (ctrlr->resource != NULL) { diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index a7d7c6f48339..27ff50496df0 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -345,6 +345,13 @@ nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, if (ctrlr->cdata.vwc.present) ns->flags |= NVME_NS_FLUSH_SUPPORTED; + /* + * cdev may have already been created, if we are reconstructing the + * namespace after a controller-level reset. + */ + if (ns->cdev != NULL) + return (0); + /* * MAKEDEV_ETERNAL was added in r210923, for cdevs that will never * be destroyed. This avoids refcounting on the cdev object. @@ -361,9 +368,15 @@ nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, device_get_unit(ctrlr->dev), ns->id); #endif - if (ns->cdev) { + if (ns->cdev != NULL) ns->cdev->si_drv1 = ns; - } return (0); } + +void nvme_ns_destruct(struct nvme_namespace *ns) +{ + + if (ns->cdev != NULL) + destroy_dev(ns->cdev); +} diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 4876bd5ad6ad..695ce5ef00ea 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -180,6 +180,8 @@ struct nvme_qpair { struct nvme_tracker **act_tr; + boolean_t is_enabled; + struct mtx lock __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE); @@ -233,6 +235,7 @@ struct nvme_controller { struct intr_config_hook config_hook; uint32_t ns_identified; uint32_t queues_created; + uint32_t num_start_attempts; /* For shared legacy interrupt. */ int rid; @@ -361,7 +364,8 @@ void nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg, int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev); void nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev); -int nvme_ctrlr_reset(struct nvme_controller *ctrlr); +int nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr); +void nvme_ctrlr_reset(struct nvme_controller *ctrlr); /* ctrlr defined as void * to allow use with config_intrhook. */ void nvme_ctrlr_start(void *ctrlr_arg); void nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr, @@ -373,21 +377,23 @@ void nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, uint16_t vector, uint32_t num_entries, uint32_t num_trackers, uint32_t max_xfer_size, struct nvme_controller *ctrlr); -void nvme_qpair_submit_cmd(struct nvme_qpair *qpair, - struct nvme_tracker *tr); +void nvme_qpair_submit_tracker(struct nvme_qpair *qpair, + struct nvme_tracker *tr); void nvme_qpair_process_completions(struct nvme_qpair *qpair); void nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req); -void nvme_qpair_manual_abort_request(struct nvme_qpair *qpair, - struct nvme_request *req, uint32_t sct, - uint32_t sc, boolean_t print_on_error); +void nvme_admin_qpair_enable(struct nvme_qpair *qpair); +void nvme_admin_qpair_disable(struct nvme_qpair *qpair); void nvme_admin_qpair_destroy(struct nvme_qpair *qpair); +void nvme_io_qpair_enable(struct nvme_qpair *qpair); +void nvme_io_qpair_disable(struct nvme_qpair *qpair); void nvme_io_qpair_destroy(struct nvme_qpair *qpair); int nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, struct nvme_controller *ctrlr); +void nvme_ns_destruct(struct nvme_namespace *ns); int nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag); diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index 25b1a89a5b87..f98125f7b3ec 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -87,23 +87,6 @@ nvme_completion_is_retry(const struct nvme_completion *cpl) } } -static struct nvme_tracker * -nvme_qpair_find_tracker(struct nvme_qpair *qpair, struct nvme_request *req) -{ - struct nvme_tracker *tr; - uint32_t i; - - KASSERT(req != NULL, ("%s: called with NULL req\n", __func__)); - - for (i = 0; i < qpair->num_entries; ++i) { - tr = qpair->act_tr[i]; - if (tr != NULL && tr->req == req) - return (tr); - } - - return (NULL); -} - static void nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, uint16_t cid) @@ -147,7 +130,7 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, callout_stop(&tr->timer); if (retry) - nvme_qpair_submit_cmd(qpair, tr); + nvme_qpair_submit_tracker(qpair, tr); else { if (req->payload_size > 0 || req->uio != NULL) bus_dmamap_unload(qpair->dma_tag, @@ -169,6 +152,21 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, mtx_unlock(&qpair->lock); } +static void +nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair, + struct nvme_tracker *tr, uint32_t sct, uint32_t sc, + boolean_t print_on_error) +{ + struct nvme_completion cpl; + + memset(&cpl, 0, sizeof(cpl)); + cpl.sqid = qpair->id; + cpl.cid = tr->cid; + cpl.sf_sct = sct; + cpl.sf_sc = sc; + nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); +} + void nvme_qpair_process_completions(struct nvme_qpair *qpair) { @@ -177,6 +175,15 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair) qpair->num_intr_handler_calls++; + if (!qpair->is_enabled) + /* + * qpair is not enabled, likely because a controller reset is + * is in progress. Ignore the interrupt - any I/O that was + * associated with this interrupt will get retried when the + * reset is complete. + */ + return; + while (1) { cpl = &qpair->cpl[qpair->cq_head]; @@ -236,15 +243,6 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, qpair->max_xfer_size = max_xfer_size; qpair->ctrlr = ctrlr; - /* - * First time through the completion queue, HW will set phase - * bit on completions to 1. So set this to 1 here, indicating - * we're looking for a 1 to know which entries have completed. - * we'll toggle the bit each time when the completion queue - * rolls over. - */ - qpair->phase = 1; - if (ctrlr->msix_enabled) { /* @@ -271,7 +269,6 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, qpair->num_cmds = 0; qpair->num_intr_handler_calls = 0; - qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ qpair->cmd = contigmalloc(qpair->num_entries * @@ -341,10 +338,30 @@ nvme_qpair_destroy(struct nvme_qpair *qpair) } } +static void +nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair) +{ + struct nvme_tracker *tr; + + tr = TAILQ_FIRST(&qpair->outstanding_tr); + while (tr != NULL) { + if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) { + nvme_qpair_manual_complete_tracker(qpair, tr, + NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, + FALSE); + tr = TAILQ_FIRST(&qpair->outstanding_tr); + } else { + tr = TAILQ_NEXT(tr, tailq); + } + } +} + void nvme_admin_qpair_destroy(struct nvme_qpair *qpair) { + nvme_admin_qpair_abort_aers(qpair); + /* * For NVMe, you don't send delete queue commands for the admin * queue, so we just need to unload and free the cmd and cpl memory. @@ -412,39 +429,6 @@ nvme_io_qpair_destroy(struct nvme_qpair *qpair) } } -static void -nvme_qpair_manual_abort_tracker(struct nvme_qpair *qpair, - struct nvme_tracker *tr, uint32_t sct, uint32_t sc, - boolean_t print_on_error) -{ - struct nvme_completion cpl; - - memset(&cpl, 0, sizeof(cpl)); - cpl.sqid = qpair->id; - cpl.cid = tr->cid; - cpl.sf_sct = sct; - cpl.sf_sc = sc; - nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); -} - -void -nvme_qpair_manual_abort_request(struct nvme_qpair *qpair, - struct nvme_request *req, uint32_t sct, uint32_t sc, - boolean_t print_on_error) -{ - struct nvme_tracker *tr; - - tr = nvme_qpair_find_tracker(qpair, req); - - if (tr == NULL) { - printf("%s: request not found\n", __func__); - nvme_dump_command(&req->cmd); - return; - } - - nvme_qpair_manual_abort_tracker(qpair, tr, sct, sc, print_on_error); -} - static void nvme_abort_complete(void *arg, const struct nvme_completion *status) { @@ -463,7 +447,7 @@ nvme_abort_complete(void *arg, const struct nvme_completion *status) * status, and then complete the I/O's tracker manually. */ printf("abort command failed, aborting command manually\n"); - nvme_qpair_manual_abort_tracker(tr->qpair, tr, + nvme_qpair_manual_complete_tracker(tr->qpair, tr, NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE); } } @@ -478,10 +462,12 @@ nvme_timeout(void *arg) } void -nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr) +nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr) { struct nvme_request *req; + mtx_assert(&qpair->lock, MA_OWNED); + req = tr->req; req->cmd.cid = tr->cid; qpair->act_tr[tr->cid] = tr; @@ -517,11 +503,14 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) tr = TAILQ_FIRST(&qpair->free_tr); - if (tr == NULL) { + if (tr == NULL || !qpair->is_enabled) { /* - * No tracker is available. Put the request on the qpair's - * request queue to be processed when a tracker frees up - * via a command completion. + * No tracker is available, or the qpair is disabled due to + * an in-progress controller-level reset. + * + * Put the request on the qpair's request queue to be processed + * when a tracker frees up via a command completion or when + * the controller reset is completed. */ STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq); return; @@ -540,7 +529,7 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) if (err != 0) panic("bus_dmamap_load returned non-zero!\n"); } else - nvme_qpair_submit_cmd(tr->qpair, tr); + nvme_qpair_submit_tracker(tr->qpair, tr); } else { err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, req->uio, @@ -558,3 +547,85 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) _nvme_qpair_submit_request(qpair, req); mtx_unlock(&qpair->lock); } + +static void +nvme_qpair_enable(struct nvme_qpair *qpair) +{ + + qpair->is_enabled = TRUE; + qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; + + /* + * First time through the completion queue, HW will set phase + * bit on completions to 1. So set this to 1 here, indicating + * we're looking for a 1 to know which entries have completed. + * we'll toggle the bit each time when the completion queue + * rolls over. + */ + qpair->phase = 1; + + memset(qpair->cmd, 0, + qpair->num_entries * sizeof(struct nvme_command)); + memset(qpair->cpl, 0, + qpair->num_entries * sizeof(struct nvme_completion)); +} + +void +nvme_admin_qpair_enable(struct nvme_qpair *qpair) +{ + + nvme_qpair_enable(qpair); +} + +void +nvme_io_qpair_enable(struct nvme_qpair *qpair) +{ + STAILQ_HEAD(, nvme_request) temp; + struct nvme_tracker *tr; + struct nvme_request *req; + + mtx_lock(&qpair->lock); + + nvme_qpair_enable(qpair); + + TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) + nvme_qpair_submit_tracker(qpair, tr); + + STAILQ_INIT(&temp); + STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request); + + while (!STAILQ_EMPTY(&temp)) { + req = STAILQ_FIRST(&temp); + STAILQ_REMOVE_HEAD(&temp, stailq); + _nvme_qpair_submit_request(qpair, req); + } + + mtx_unlock(&qpair->lock); +} + +static void +nvme_qpair_disable(struct nvme_qpair *qpair) +{ + struct nvme_tracker *tr; + + qpair->is_enabled = FALSE; + mtx_lock(&qpair->lock); + TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) + callout_stop(&tr->timer); + mtx_unlock(&qpair->lock); +} + +void +nvme_admin_qpair_disable(struct nvme_qpair *qpair) +{ + + nvme_qpair_disable(qpair); + nvme_admin_qpair_abort_aers(qpair); +} + +void +nvme_io_qpair_disable(struct nvme_qpair *qpair) +{ + + nvme_qpair_disable(qpair); +}