Add ability to queue nvme_request objects if no nvme_trackers are available.

This eliminates the need to manage queue depth at the nvd(4) level for Chatham prototype board workarounds, and also adds the ability to accept a number of requests on a single qpair that is much larger than the number of trackers allocated. Sponsored by: Intel
2012-10-18 00:45:53 +00:00 · 2012-10-18 00:45:53 +00:00 · 0f71ecf741
commit 0f71ecf741
parent 21b6da584b
4 changed files with 34 additions and 32 deletions
--- a/sys/dev/nvd/nvd.c
+++ b/sys/dev/nvd/nvd.c
@ -162,8 +162,7 @@ nvd_done(void *arg, const struct nvme_completion *status)

 	ndisk = bp->bio_disk->d_drv1;

-	if (atomic_fetchadd_int(&ndisk->cur_depth, -1) == NVME_QD)
-		taskqueue_enqueue(ndisk->tq, &ndisk->bioqtask);
+	atomic_add_int(&ndisk->cur_depth, -1);

 	/*
 	 * TODO: add more extensive translation of NVMe status codes
@ -187,9 +186,6 @@ nvd_bioq_process(void *arg, int pending)
 	int err;

 	for (;;) {
-		if (atomic_load_acq_int(&ndisk->cur_depth) >= NVME_QD)
-			break;
-
 		mtx_lock(&ndisk->bioqlock);
 		bp = bioq_takefirst(&ndisk->bioq);
 		mtx_unlock(&ndisk->bioqlock);
@ -210,24 +206,12 @@ nvd_bioq_process(void *arg, int pending)
 #endif

 		bp->bio_driver1 = NULL;
-		atomic_add_acq_int(&ndisk->cur_depth, 1);
+		atomic_add_int(&ndisk->cur_depth, 1);

 		err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);

-		/*
-		 * TODO: remove this loop and rely on GEOM's pacing once
-		 *  nvme(4) returns ENOMEM only for malloc() failures.
-		 *  Currently nvme(4) returns ENOMEM also for cases when
-		 *  the submission queue is completely full, and that case
-		 *  will be handled more elegantly in a future update.
-		 */
-		while (err == ENOMEM) {
-			pause("nvd enomem", 1);
-			err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
-		}
-
 		if (err) {
-			atomic_add_acq_int(&ndisk->cur_depth, -1);
+			atomic_add_int(&ndisk->cur_depth, -1);
 			bp->bio_error = err;
 			bp->bio_flags |= BIO_ERROR;
 			bp->bio_resid = bp->bio_bcount;
--- a/sys/dev/nvme/nvme.h
+++ b/sys/dev/nvme/nvme.h
@ -689,9 +689,6 @@ enum nvme_io_test_flags {

 struct bio;

-/* TODO: reassess this QD variable - its a workaround for Chatham2 issue */
-#define NVME_QD			(200)
-
 struct nvme_namespace;
 struct nvme_consumer;

--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@ -115,7 +115,7 @@ struct nvme_request {
 	struct uio			*uio;
 	nvme_cb_fn_t			cb_fn;
 	void				*cb_arg;
-	SLIST_ENTRY(nvme_request)	slist;
+	STAILQ_ENTRY(nvme_request)	stailq;
 };

 struct nvme_tracker {
@ -168,6 +168,7 @@ struct nvme_qpair {
 	uint64_t		cpl_bus_addr;

 	SLIST_HEAD(, nvme_tracker)	free_tr;
+	STAILQ_HEAD(, nvme_request)	queued_req;

 	struct nvme_tracker	**act_tr;

--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@ -30,6 +30,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/bus.h>

+#include <dev/pci/pcivar.h>
+
 #include "nvme_private.h"

 static boolean_t
@ -142,10 +144,13 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair)

 			nvme_free_request(req);

-			if (SLIST_EMPTY(&qpair->free_tr))
-				wakeup(qpair);
-
 			SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
+
+			if (!STAILQ_EMPTY(&qpair->queued_req)) {
+				req = STAILQ_FIRST(&qpair->queued_req);
+				STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
+				nvme_qpair_submit_request(qpair, req);
+			}
 		}

 		mtx_unlock(&qpair->lock);
@ -179,6 +184,16 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
 	qpair->id = id;
 	qpair->vector = vector;
 	qpair->num_entries = num_entries;
+#ifdef CHATHAM2
+	/*
+	 * Chatham prototype board starts having issues at higher queue
+	 *  depths.  So use a conservative estimate here of no more than 64
+	 *  outstanding I/O per queue at any one point.
+	 */
+	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
+		num_trackers = min(num_trackers, 64);
+#endif
+	qpair->num_trackers = num_trackers;
 	qpair->max_xfer_size = max_xfer_size;
 	qpair->ctrlr = ctrlr;

@ -217,7 +232,6 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,

 	qpair->num_cmds = 0;
 	qpair->num_intr_handler_calls = 0;
-	qpair->num_trackers = num_trackers;
 	qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;

 	/* TODO: error checking on contigmalloc, bus_dmamap_load calls */
@ -242,8 +256,9 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
 	qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);

 	SLIST_INIT(&qpair->free_tr);
+	STAILQ_INIT(&qpair->queued_req);

-	for (i = 0; i < num_trackers; i++) {
+	for (i = 0; i < qpair->num_trackers; i++) {
 		tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_NOWAIT);

 		if (tr == NULL) {
@ -400,10 +415,14 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)

 	tr = SLIST_FIRST(&qpair->free_tr);

-	while (tr == NULL) {
-		msleep(qpair, &qpair->lock, PRIBIO, "qpair_tr", 0);
-		printf("msleep\n");
-		tr = SLIST_FIRST(&qpair->free_tr);
+	if (tr == NULL) {
+		/*
+		 * No tracker is available.  Put the request on the qpair's
+		 *  request queue to be processed when a tracker frees up
+		 *  via a command completion.
+		 */
+		STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
+		goto ret;
 	}

 	SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
@ -427,5 +446,6 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
 			panic("bus_dmamap_load returned non-zero!\n");
 	}

+ret:
 	mtx_unlock(&qpair->lock);
 }