diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index 91a674eb77..a499605341 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -133,7 +133,7 @@ struct __attribute__((packed)) nvme_payload { /** * Functions for retrieving physical addresses for scattered payloads. */ - struct { + struct nvme_sgl_args { spdk_nvme_req_reset_sgl_cb reset_sgl_fn; spdk_nvme_req_next_sge_cb next_sge_fn; void *cb_arg; @@ -603,4 +603,10 @@ void nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr); void nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr); int nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr); +static inline bool +_is_page_aligned(uint64_t address) +{ + return (address & (PAGE_SIZE - 1)) == 0; +} + #endif /* __NVME_INTERNAL_H__ */ diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c index 6b36ef9174..7b8d4a0d95 100644 --- a/lib/nvme/nvme_ns_cmd.c +++ b/lib/nvme/nvme_ns_cmd.c @@ -37,7 +37,7 @@ static struct nvme_request *_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, uint32_t io_flags, - uint16_t apptag_mask, uint16_t apptag); + uint16_t apptag_mask, uint16_t apptag, bool check_sgl); static void nvme_cb_complete_child(void *child_arg, const struct spdk_nvme_cpl *cpl) @@ -114,12 +114,12 @@ _nvme_add_child_request(struct spdk_nvme_ns *ns, const struct nvme_payload *payl uint32_t payload_offset, uint32_t md_offset, uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, - struct nvme_request *parent) + struct nvme_request *parent, bool check_sgl) { struct nvme_request *child; child = _nvme_ns_cmd_rw(ns, payload, payload_offset, md_offset, lba, lba_count, cb_fn, - cb_arg, opc, io_flags, apptag_mask, apptag); + cb_arg, opc, io_flags, apptag_mask, apptag, check_sgl); if (child == NULL) { nvme_request_free_children(parent); nvme_free_request(parent); @@ -157,7 +157,7 @@ _nvme_ns_cmd_split_request(struct spdk_nvme_ns *ns, child = _nvme_add_child_request(ns, payload, payload_offset, md_offset, lba, lba_count, cb_fn, cb_arg, opc, - io_flags, apptag_mask, apptag, req); + io_flags, apptag_mask, apptag, req, true); if (child == NULL) { return NULL; } @@ -200,11 +200,117 @@ _nvme_ns_cmd_setup_request(struct spdk_nvme_ns *ns, struct nvme_request *req, cmd->cdw15 = (cmd->cdw15 << 16 | apptag); } +static struct nvme_request * +_nvme_ns_cmd_split_sgl_request(struct spdk_nvme_ns *ns, + const struct nvme_payload *payload, + uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, + uint32_t io_flags, struct nvme_request *req, + uint16_t apptag_mask, uint16_t apptag) +{ + struct nvme_sgl_args *args; + bool start_valid, end_valid, last_sge, child_equals_parent; + uint64_t child_lba = lba; + uint32_t req_current_length = 0; + uint32_t child_length = 0; + uint32_t sge_length; + uintptr_t address; + + args = &req->payload.u.sgl; + + args->reset_sgl_fn(args->cb_arg, payload_offset); + args->next_sge_fn(args->cb_arg, (void **)&address, &sge_length); + while (req_current_length < req->payload_size) { + + if (sge_length == 0) { + continue; + } else if (req_current_length + sge_length > req->payload_size) { + sge_length = req->payload_size - req_current_length; + } + + /* + * The start of the SGE is invalid if the start address is not page aligned, + * unless it is the first SGE in the child request. + */ + start_valid = child_length == 0 || _is_page_aligned(address); + + /* Boolean for whether this is the last SGE in the parent request. */ + last_sge = (req_current_length + sge_length == req->payload_size); + + /* + * The end of the SGE is invalid if the end address is not page aligned, + * unless it is the last SGE in the parent request. + */ + end_valid = last_sge || _is_page_aligned(address + sge_length); + + /* + * This child request equals the parent request, meaning that no splitting + * was required for the parent request (the one passed into this function). + * In this case, we do not create a child request at all - we just send + * the original request as a single request at the end of this function. + */ + child_equals_parent = (child_length + sge_length == req->payload_size); + + if (start_valid) { + /* + * The start of the SGE is valid, so advance the length parameters, + * to include this SGE with previous SGEs for this child request + * (if any). If it is not valid, we do not advance the length + * parameters nor get the next SGE, because we must send what has + * been collected before this SGE as a child request. + */ + child_length += sge_length; + req_current_length += sge_length; + if (req_current_length < req->payload_size) { + args->next_sge_fn(args->cb_arg, (void **)&address, &sge_length); + } + } + + /* + * If one of these criteria is met, send what we have accumlated so far as a + * child request. + */ + if (!start_valid || !end_valid || !child_equals_parent) { + struct nvme_request *child; + uint32_t child_lba_count; + + if ((child_length % ns->sector_size) != 0) { + return NULL; + } + child_lba_count = child_length / ns->sector_size; + /* + * Note the last parameter is set to "false" - this tells the recursive + * call to _nvme_ns_cmd_rw() to not bother with checking for SGL splitting + * since we have already verified it here. + */ + child = _nvme_add_child_request(ns, payload, payload_offset, md_offset, + child_lba, child_lba_count, + cb_fn, cb_arg, opc, io_flags, + apptag_mask, apptag, req, false); + if (child == NULL) { + return NULL; + } + payload_offset += child_length; + md_offset += child_lba_count * ns->md_size; + child_lba += child_lba_count; + child_length = 0; + } + } + + if (child_length == req->payload_size) { + /* No splitting was required, so setup the whole payload as one request. */ + _nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag); + } + + return req; +} + static struct nvme_request * _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, - uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag) + uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, bool check_sgl) { struct nvme_request *req; uint32_t sector_size; @@ -250,6 +356,10 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, const struct nvme_payload *payload, return _nvme_ns_cmd_split_request(ns, payload, payload_offset, md_offset, lba, lba_count, cb_fn, cb_arg, opc, io_flags, req, sectors_per_max_io, 0, apptag_mask, apptag); + } else if (req->payload.type == NVME_PAYLOAD_TYPE_SGL && check_sgl && + !(ns->ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED)) { + return _nvme_ns_cmd_split_sgl_request(ns, payload, payload_offset, md_offset, lba, lba_count, + cb_fn, cb_arg, opc, io_flags, req, apptag_mask, apptag); } _nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag); @@ -271,7 +381,7 @@ spdk_nvme_ns_cmd_read(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, vo req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ, io_flags, 0, - 0); + 0, true); if (req != NULL) { return nvme_qpair_submit_request(qpair, req); } else { @@ -295,7 +405,7 @@ spdk_nvme_ns_cmd_read_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *q req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ, io_flags, - apptag_mask, apptag); + apptag_mask, apptag, true); if (req != NULL) { return nvme_qpair_submit_request(qpair, req); } else { @@ -323,7 +433,7 @@ spdk_nvme_ns_cmd_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, payload.u.sgl.cb_arg = cb_arg; req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ, - io_flags, 0, 0); + io_flags, 0, 0, true); if (req != NULL) { return nvme_qpair_submit_request(qpair, req); } else { @@ -345,7 +455,7 @@ spdk_nvme_ns_cmd_write(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, payload.md = NULL; req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE, - io_flags, 0, 0); + io_flags, 0, 0, true); if (req != NULL) { return nvme_qpair_submit_request(qpair, req); } else { @@ -367,7 +477,7 @@ spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair * payload.md = metadata; req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE, - io_flags, apptag_mask, apptag); + io_flags, apptag_mask, apptag, true); if (req != NULL) { return nvme_qpair_submit_request(qpair, req); } else { @@ -395,7 +505,7 @@ spdk_nvme_ns_cmd_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, payload.u.sgl.cb_arg = cb_arg; req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE, - io_flags, 0, 0); + io_flags, 0, 0, true); if (req != NULL) { return nvme_qpair_submit_request(qpair, req); } else { diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c index 5dae16a0b0..2f0591e9da 100644 --- a/lib/nvme/nvme_pcie.c +++ b/lib/nvme/nvme_pcie.c @@ -1666,12 +1666,15 @@ nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvm return -1; } - /* Confirm that this sge is prp compatible. */ - if (phys_addr & 0x3 || - (length < remaining_transfer_len && ((phys_addr + length) & (PAGE_SIZE - 1)))) { - nvme_pcie_fail_request_bad_vtophys(qpair, tr); - return -1; - } + /* + * Any incompatible sges should have been handled up in the splitting routine, + * but assert here as an additional check. + */ + assert((phys_addr & 0x3) == 0); /* Address must be dword aligned. */ + /* All SGEs except last must end on a page boundary. */ + assert((length >= remaining_transfer_len) || _is_page_aligned(phys_addr + length)); + /* All SGe except first must start on a page boundary. */ + assert((sge_count == 0) || _is_page_aligned(phys_addr)); data_transferred = nvme_min(remaining_transfer_len, length); diff --git a/test/lib/nvme/sgl/sgl.c b/test/lib/nvme/sgl/sgl.c index 63068c0009..9511e05591 100644 --- a/test/lib/nvme/sgl/sgl.c +++ b/test/lib/nvme/sgl/sgl.c @@ -232,6 +232,48 @@ static void build_io_request_7(struct io_request *req) req->iovs[0].len = 0x10000; } +static void build_io_request_8(struct io_request *req) +{ + req->nseg = 2; + + /* + * 1KB for 1st sge, make sure the iov address does not start and end + * at 0x1000 boundary + */ + req->iovs[0].base = spdk_zmalloc(0x1000, 0x1000, NULL); + req->iovs[0].offset = 0x400; + req->iovs[0].len = 0x400; + + /* + * 1KB for 1st sge, make sure the iov address does not start and end + * at 0x1000 boundary + */ + req->iovs[1].base = spdk_zmalloc(0x1000, 0x1000, NULL); + req->iovs[1].offset = 0x400; + req->iovs[1].len = 0x400; +} + +static void build_io_request_9(struct io_request *req) +{ + /* + * Check if mixed PRP complaint and not complaint requests are handled + * properly by spliting them into subrequests. + * Construct buffers with following theme: + */ + const size_t req_len[] = { 2048, 4096, 2048, 4096, 2048, 1024 }; + const size_t req_off[] = { 0x800, 0x0, 0x0, 0x100, 0x800, 0x800 }; + struct sgl_element *iovs = req->iovs; + uint32_t i; + req->nseg = sizeof(req_len) / sizeof(req_len[0]); + assert(sizeof(req_len) / sizeof(req_len[0]) == sizeof(req_off) / sizeof(req_off[0])); + + for (i = 0; i < req->nseg; i++) { + iovs[i].base = spdk_zmalloc(req_off[i] + req_len[i], 0x4000, NULL); + iovs[i].offset = req_off[i]; + iovs[i].len = req_len[i]; + } +} + typedef void (*nvme_build_io_req_fn_t)(struct io_request *req); static void @@ -442,7 +484,9 @@ int main(int argc, char **argv) || TEST(build_io_request_4) || TEST(build_io_request_5) || TEST(build_io_request_6) - || TEST(build_io_request_7)) { + || TEST(build_io_request_7) + || TEST(build_io_request_8) + || TEST(build_io_request_9)) { #undef TEST rc = 1; printf("%s: failed sgl tests\n", iter->name); diff --git a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c index fc7d4fc555..fb850c3138 100644 --- a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c +++ b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c @@ -58,6 +58,16 @@ static void nvme_request_reset_sgl(void *cb_arg, uint32_t sgl_offset) static int nvme_request_next_sge(void *cb_arg, void **address, uint32_t *length) { + uint32_t *lba_count = cb_arg; + + /* + * We need to set address to something here, since the SGL splitting code will + * use it to determine PRP compatibility. Just use a rather arbitrary address + * for now - these tests will not actually cause data to be read from or written + * to this address. + */ + *address = (void *)(uintptr_t)0x10000000; + *length = *lba_count; return 0; } @@ -187,6 +197,11 @@ prepare_for_test(struct spdk_nvme_ns *ns, struct spdk_nvme_ctrlr *ctrlr, uint32_t stripe_size) { ctrlr->max_xfer_size = max_xfer_size; + /* + * Clear the flags field - we especially want to make sure the SGL_SUPPORTED flag is not set + * so that we test the SGL splitting path. + */ + ctrlr->flags = 0; memset(ns, 0, sizeof(*ns)); ns->ctrlr = ctrlr; ns->sector_size = sector_size; @@ -583,11 +598,14 @@ test_nvme_ns_cmd_readv(void) struct spdk_nvme_qpair qpair; int rc = 0; void *cb_arg; + uint32_t lba_count = 256; + uint32_t sector_size = 512; + uint64_t sge_length = lba_count * sector_size; cb_arg = malloc(512); - prepare_for_test(&ns, &ctrlr, &qpair, 512, 128 * 1024, 0); - rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0, nvme_request_reset_sgl, - nvme_request_next_sge); + prepare_for_test(&ns, &ctrlr, &qpair, sector_size, 128 * 1024, 0); + rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, lba_count, NULL, &sge_length, 0, + nvme_request_reset_sgl, nvme_request_next_sge); SPDK_CU_ASSERT_FATAL(rc == 0); SPDK_CU_ASSERT_FATAL(g_request != NULL); @@ -595,7 +613,7 @@ test_nvme_ns_cmd_readv(void) CU_ASSERT(g_request->payload.type == NVME_PAYLOAD_TYPE_SGL); CU_ASSERT(g_request->payload.u.sgl.reset_sgl_fn == nvme_request_reset_sgl); CU_ASSERT(g_request->payload.u.sgl.next_sge_fn == nvme_request_next_sge); - CU_ASSERT(g_request->payload.u.sgl.cb_arg == cb_arg); + CU_ASSERT(g_request->payload.u.sgl.cb_arg == &sge_length); CU_ASSERT(g_request->cmd.nsid == ns.id); rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0, nvme_request_reset_sgl, @@ -614,12 +632,14 @@ test_nvme_ns_cmd_writev(void) struct spdk_nvme_qpair qpair; int rc = 0; void *cb_arg; + uint32_t lba_count = 256; + uint32_t sector_size = 512; + uint64_t sge_length = lba_count * sector_size; cb_arg = malloc(512); - prepare_for_test(&ns, &ctrlr, &qpair, 512, 128 * 1024, 0); - rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0, - nvme_request_reset_sgl, - nvme_request_next_sge); + prepare_for_test(&ns, &ctrlr, &qpair, sector_size, 128 * 1024, 0); + rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, lba_count, NULL, &sge_length, 0, + nvme_request_reset_sgl, nvme_request_next_sge); SPDK_CU_ASSERT_FATAL(rc == 0); SPDK_CU_ASSERT_FATAL(g_request != NULL); @@ -627,7 +647,7 @@ test_nvme_ns_cmd_writev(void) CU_ASSERT(g_request->payload.type == NVME_PAYLOAD_TYPE_SGL); CU_ASSERT(g_request->payload.u.sgl.reset_sgl_fn == nvme_request_reset_sgl); CU_ASSERT(g_request->payload.u.sgl.next_sge_fn == nvme_request_next_sge); - CU_ASSERT(g_request->payload.u.sgl.cb_arg == cb_arg); + CU_ASSERT(g_request->payload.u.sgl.cb_arg == &sge_length); CU_ASSERT(g_request->cmd.nsid == ns.id); rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0,