lib/ftl: Limit write queue depth to one per chunk

There are no guarantees of the order of completion of NVMe IO submission queue so wait for write completion on specific chunk before submitting another write to it. To control chunk occupancy split IO to child requests and release chunk in IO completion callback. Change-Id: I44147a21b528a7f33fb92b9e77d7de8f5b18f8ff Signed-off-by: Wojciech Malikowski <wojciech.malikowski@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/449239 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com> Reviewed-by: Young Tack Jin <youngtack.jin@circuitblvd.com> Reviewed-by: Claire Jihyun In <claire.in@circuitblvd.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2019-03-25 07:06:11 -04:00 · 2019-03-25 07:06:11 -04:00 · 58c4dac9d9
commit 58c4dac9d9
parent 049624823b
3 changed files with 125 additions and 17 deletions
--- a/lib/ftl/ftl_band.c
+++ b/lib/ftl/ftl_band.c
@ -765,6 +765,7 @@ ftl_band_write_md(struct ftl_band *band, void *data, size_t lbk_cnt,
 {
 	struct spdk_ftl_dev *dev = band->dev;
 	struct ftl_io *io;
+	int rc;

 	io = ftl_io_init_md_write(dev, band, data,
 				  spdk_divide_round_up(lbk_cnt, dev->xfer_size), cb);
@ -774,7 +775,12 @@ ftl_band_write_md(struct ftl_band *band, void *data, size_t lbk_cnt,

 	md_fn(dev, &band->md, data);

-	return ftl_io_write(io);
+	rc = ftl_io_write(io);
+	if (rc == -EAGAIN) {
+		rc = 0;
+	}
+
+	return rc;
 }

 void
--- a/lib/ftl/ftl_band.h
+++ b/lib/ftl/ftl_band.h
@ -55,6 +55,9 @@ struct ftl_chunk {
 	/* Block state */
 	enum ftl_chunk_state			state;

+	/* Indicates that there is inflight write */
+	bool					busy;
+
 	/* First PPA */
 	struct ftl_ppa				start_ppa;

@ -250,7 +253,9 @@ ftl_band_chunk_is_first(struct ftl_band *band, struct ftl_chunk *chunk)
 static inline int
 ftl_chunk_is_writable(const struct ftl_chunk *chunk)
 {
-	return chunk->state == FTL_CHUNK_STATE_OPEN || chunk->state == FTL_CHUNK_STATE_FREE;
+	return (chunk->state == FTL_CHUNK_STATE_OPEN ||
+		chunk->state == FTL_CHUNK_STATE_FREE) &&
+	       !chunk->busy;
 }

 #endif /* FTL_BAND_H */
--- a/lib/ftl/ftl_core.c
+++ b/lib/ftl/ftl_core.c
@ -66,6 +66,9 @@ struct ftl_wptr {
 	/* Current erase block */
 	struct ftl_chunk		*chunk;

+	/* IO that is currently processed */
+	struct ftl_io			*current_io;
+
 	/* List link */
 	LIST_ENTRY(ftl_wptr)		list_entry;
 };
@ -438,6 +441,7 @@ ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size)
 		ftl_band_set_state(band, FTL_BAND_STATE_FULL);
 	}

+	wptr->chunk->busy = true;
 	wptr->ppa = ftl_band_next_xfer_ppa(band, wptr->ppa, xfer_size);
 	wptr->chunk = ftl_band_next_operational_chunk(band, wptr->chunk);

@ -740,8 +744,8 @@ ftl_submit_read(struct ftl_io *io, ftl_next_ppa_fn next_ppa,
 			break;
 		}

-		ftl_io_advance(io, lbk_cnt);
 		ftl_io_inc_req(io);
+		ftl_io_advance(io, lbk_cnt);
 	}

 	/* If we didn't have to read anything from the device, */
@ -978,37 +982,123 @@ ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry,
 	pthread_spin_unlock(&band->md.lock);
 }

+static struct ftl_io *
+ftl_io_init_child_write(struct ftl_io *parent, struct ftl_ppa ppa,
+			void *data, void *md, spdk_ftl_fn cb)
+{
+	struct ftl_io *io;
+	struct spdk_ftl_dev *dev = parent->dev;
+	struct ftl_io_init_opts opts = {
+		.dev		= dev,
+		.io		= NULL,
+		.parent		= parent,
+		.rwb_batch	= NULL,
+		.band		= parent->band,
+		.size		= sizeof(struct ftl_io),
+		.flags		= 0,
+		.type		= FTL_IO_WRITE,
+		.iov_cnt	= 1,
+		.req_size	= dev->xfer_size,
+		.fn		= cb,
+		.data		= data,
+		.md		= md,
+	};
+
+	io = ftl_io_init_internal(&opts);
+	if (!io) {
+		return NULL;
+	}
+
+	io->ppa = ppa;
+
+	return io;
+}
+
+static void
+ftl_io_child_write_cb(void *ctx, int status)
+{
+	struct ftl_chunk *chunk;
+	struct ftl_io *io = ctx;
+
+	chunk = ftl_band_chunk_from_ppa(io->band, io->ppa);
+	chunk->busy = false;
+}
+
+static int
+ftl_submit_child_write(struct ftl_wptr *wptr, struct ftl_io *io, int lbk_cnt)
+{
+	struct spdk_ftl_dev	*dev = io->dev;
+	struct ftl_io		*child;
+	struct iovec		*iov = ftl_io_iovec(io);
+	int			rc;
+
+	/* Split IO to child requests and release chunk immediately after child is completed */
+	child = ftl_io_init_child_write(io, wptr->ppa, iov[io->iov_pos].iov_base,
+					ftl_io_get_md(io), ftl_io_child_write_cb);
+	if (!child) {
+		return -EAGAIN;
+	}
+
+	rc = spdk_nvme_ns_cmd_write_with_md(dev->ns, ftl_get_write_qpair(dev),
+					    child->iov.iov_base, child->md,
+					    ftl_ppa_addr_pack(dev, wptr->ppa),
+					    lbk_cnt, ftl_io_cmpl_cb, child, 0, 0, 0);
+	if (rc) {
+		ftl_io_fail(child, rc);
+		ftl_io_complete(child);
+		SPDK_ERRLOG("spdk_nvme_ns_cmd_write failed with status:%d, ppa:%lu\n",
+			    rc, wptr->ppa.ppa);
+
+		return -EIO;
+	}
+
+	ftl_io_inc_req(child);
+	ftl_io_advance(child, lbk_cnt);
+
+	return 0;
+}
+
 static int
 ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io)
 {
 	struct spdk_ftl_dev	*dev = io->dev;
 	struct iovec		*iov = ftl_io_iovec(io);
 	int			rc = 0;
-	size_t			i, lbk_cnt;
+	size_t			lbk_cnt;

-	for (i = 0; i < io->iov_cnt; ++i) {
-		lbk_cnt = iov[i].iov_len / PAGE_SIZE;
-		assert(iov[i].iov_len > 0);
+	while (io->iov_pos < io->iov_cnt) {
+		lbk_cnt = iov[io->iov_pos].iov_len / PAGE_SIZE;
+		assert(iov[io->iov_pos].iov_len > 0);
 		assert(lbk_cnt == dev->xfer_size);

-		ftl_trace_submission(dev, io, wptr->ppa, iov[i].iov_len / PAGE_SIZE);
-		rc = spdk_nvme_ns_cmd_write_with_md(dev->ns, ftl_get_write_qpair(dev),
-						    iov[i].iov_base, ftl_io_get_md(io),
-						    ftl_ppa_addr_pack(dev, wptr->ppa),
-						    lbk_cnt, ftl_io_cmpl_cb, io, 0, 0, 0);
-		if (rc) {
-			ftl_io_fail(io, rc);
-			SPDK_ERRLOG("spdk_nvme_ns_cmd_write failed with status:%d, ppa:%lu\n",
-				    rc, wptr->ppa.ppa);
+		/* There are no guarantees of the order of completion of NVMe IO submission queue */
+		/* so wait until chunk is not busy before submitting another write */
+		if (wptr->chunk->busy) {
+			wptr->current_io = io;
+			rc = -EAGAIN;
 			break;
 		}

-		ftl_io_inc_req(io);
+		rc = ftl_submit_child_write(wptr, io, lbk_cnt);
+
+		if (rc == -EAGAIN) {
+			wptr->current_io = io;
+			break;
+		} else if (rc) {
+			ftl_io_fail(io, rc);
+			break;
+		}
+
+		ftl_trace_submission(dev, io, wptr->ppa, lbk_cnt);
+
+		/* Update parent iovec */
 		ftl_io_advance(io, lbk_cnt);
+
 		ftl_wptr_advance(wptr, lbk_cnt);
 	}

 	if (ftl_io_done(io)) {
+		/* Parent IO will complete after all children are completed */
 		ftl_io_complete(io);
 	}

@ -1045,6 +1135,13 @@ ftl_wptr_process_writes(struct ftl_wptr *wptr)
 	struct ftl_io		*io;
 	struct ftl_ppa		ppa, prev_ppa;

+	if (wptr->current_io) {
+		if (ftl_submit_write(wptr, wptr->current_io) == -EAGAIN) {
+			return 0;
+		}
+		wptr->current_io = NULL;
+	}
+
 	/* Make sure the band is prepared for writing */
 	if (!ftl_wptr_ready(wptr)) {
 		return 0;