nvme: split non-compliant SGLs into multiple requests

Since nvme_ns_cmd.c now walks the SGL, some of the test code
needs to also be updated to initialize and return correct values
such as ctrlr->flags and sge_length.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I521213695def35d0897aabf57a0638a6c347632e
This commit is contained in:
Jim Harris 2016-12-16 09:41:18 -07:00
parent 38c09e5eed
commit 86e8a920bf
5 changed files with 211 additions and 28 deletions

View File

@ -133,7 +133,7 @@ struct __attribute__((packed)) nvme_payload {
/**
* Functions for retrieving physical addresses for scattered payloads.
*/
struct {
struct nvme_sgl_args {
spdk_nvme_req_reset_sgl_cb reset_sgl_fn;
spdk_nvme_req_next_sge_cb next_sge_fn;
void *cb_arg;
@ -603,4 +603,10 @@ void nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr);
void nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr);
int nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr);
static inline bool
_is_page_aligned(uint64_t address)
{
return (address & (PAGE_SIZE - 1)) == 0;
}
#endif /* __NVME_INTERNAL_H__ */

View File

@ -37,7 +37,7 @@ static struct nvme_request *_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns,
const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset,
uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
void *cb_arg, uint32_t opc, uint32_t io_flags,
uint16_t apptag_mask, uint16_t apptag);
uint16_t apptag_mask, uint16_t apptag, bool check_sgl);
static void
nvme_cb_complete_child(void *child_arg, const struct spdk_nvme_cpl *cpl)
@ -114,12 +114,12 @@ _nvme_add_child_request(struct spdk_nvme_ns *ns, const struct nvme_payload *payl
uint32_t payload_offset, uint32_t md_offset,
uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag,
struct nvme_request *parent)
struct nvme_request *parent, bool check_sgl)
{
struct nvme_request *child;
child = _nvme_ns_cmd_rw(ns, payload, payload_offset, md_offset, lba, lba_count, cb_fn,
cb_arg, opc, io_flags, apptag_mask, apptag);
cb_arg, opc, io_flags, apptag_mask, apptag, check_sgl);
if (child == NULL) {
nvme_request_free_children(parent);
nvme_free_request(parent);
@ -157,7 +157,7 @@ _nvme_ns_cmd_split_request(struct spdk_nvme_ns *ns,
child = _nvme_add_child_request(ns, payload, payload_offset, md_offset,
lba, lba_count, cb_fn, cb_arg, opc,
io_flags, apptag_mask, apptag, req);
io_flags, apptag_mask, apptag, req, true);
if (child == NULL) {
return NULL;
}
@ -200,11 +200,117 @@ _nvme_ns_cmd_setup_request(struct spdk_nvme_ns *ns, struct nvme_request *req,
cmd->cdw15 = (cmd->cdw15 << 16 | apptag);
}
static struct nvme_request *
_nvme_ns_cmd_split_sgl_request(struct spdk_nvme_ns *ns,
const struct nvme_payload *payload,
uint32_t payload_offset, uint32_t md_offset,
uint64_t lba, uint32_t lba_count,
spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
uint32_t io_flags, struct nvme_request *req,
uint16_t apptag_mask, uint16_t apptag)
{
struct nvme_sgl_args *args;
bool start_valid, end_valid, last_sge, child_equals_parent;
uint64_t child_lba = lba;
uint32_t req_current_length = 0;
uint32_t child_length = 0;
uint32_t sge_length;
uintptr_t address;
args = &req->payload.u.sgl;
args->reset_sgl_fn(args->cb_arg, payload_offset);
args->next_sge_fn(args->cb_arg, (void **)&address, &sge_length);
while (req_current_length < req->payload_size) {
if (sge_length == 0) {
continue;
} else if (req_current_length + sge_length > req->payload_size) {
sge_length = req->payload_size - req_current_length;
}
/*
* The start of the SGE is invalid if the start address is not page aligned,
* unless it is the first SGE in the child request.
*/
start_valid = child_length == 0 || _is_page_aligned(address);
/* Boolean for whether this is the last SGE in the parent request. */
last_sge = (req_current_length + sge_length == req->payload_size);
/*
* The end of the SGE is invalid if the end address is not page aligned,
* unless it is the last SGE in the parent request.
*/
end_valid = last_sge || _is_page_aligned(address + sge_length);
/*
* This child request equals the parent request, meaning that no splitting
* was required for the parent request (the one passed into this function).
* In this case, we do not create a child request at all - we just send
* the original request as a single request at the end of this function.
*/
child_equals_parent = (child_length + sge_length == req->payload_size);
if (start_valid) {
/*
* The start of the SGE is valid, so advance the length parameters,
* to include this SGE with previous SGEs for this child request
* (if any). If it is not valid, we do not advance the length
* parameters nor get the next SGE, because we must send what has
* been collected before this SGE as a child request.
*/
child_length += sge_length;
req_current_length += sge_length;
if (req_current_length < req->payload_size) {
args->next_sge_fn(args->cb_arg, (void **)&address, &sge_length);
}
}
/*
* If one of these criteria is met, send what we have accumlated so far as a
* child request.
*/
if (!start_valid || !end_valid || !child_equals_parent) {
struct nvme_request *child;
uint32_t child_lba_count;
if ((child_length % ns->sector_size) != 0) {
return NULL;
}
child_lba_count = child_length / ns->sector_size;
/*
* Note the last parameter is set to "false" - this tells the recursive
* call to _nvme_ns_cmd_rw() to not bother with checking for SGL splitting
* since we have already verified it here.
*/
child = _nvme_add_child_request(ns, payload, payload_offset, md_offset,
child_lba, child_lba_count,
cb_fn, cb_arg, opc, io_flags,
apptag_mask, apptag, req, false);
if (child == NULL) {
return NULL;
}
payload_offset += child_length;
md_offset += child_lba_count * ns->md_size;
child_lba += child_lba_count;
child_length = 0;
}
}
if (child_length == req->payload_size) {
/* No splitting was required, so setup the whole payload as one request. */
_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
}
return req;
}
static struct nvme_request *
_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, const struct nvme_payload *payload,
uint32_t payload_offset, uint32_t md_offset,
uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag)
uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, bool check_sgl)
{
struct nvme_request *req;
uint32_t sector_size;
@ -250,6 +356,10 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, const struct nvme_payload *payload,
return _nvme_ns_cmd_split_request(ns, payload, payload_offset, md_offset, lba, lba_count, cb_fn,
cb_arg, opc,
io_flags, req, sectors_per_max_io, 0, apptag_mask, apptag);
} else if (req->payload.type == NVME_PAYLOAD_TYPE_SGL && check_sgl &&
!(ns->ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED)) {
return _nvme_ns_cmd_split_sgl_request(ns, payload, payload_offset, md_offset, lba, lba_count,
cb_fn, cb_arg, opc, io_flags, req, apptag_mask, apptag);
}
_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
@ -271,7 +381,7 @@ spdk_nvme_ns_cmd_read(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, vo
req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
io_flags, 0,
0);
0, true);
if (req != NULL) {
return nvme_qpair_submit_request(qpair, req);
} else {
@ -295,7 +405,7 @@ spdk_nvme_ns_cmd_read_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *q
req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
io_flags,
apptag_mask, apptag);
apptag_mask, apptag, true);
if (req != NULL) {
return nvme_qpair_submit_request(qpair, req);
} else {
@ -323,7 +433,7 @@ spdk_nvme_ns_cmd_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
payload.u.sgl.cb_arg = cb_arg;
req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
io_flags, 0, 0);
io_flags, 0, 0, true);
if (req != NULL) {
return nvme_qpair_submit_request(qpair, req);
} else {
@ -345,7 +455,7 @@ spdk_nvme_ns_cmd_write(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
payload.md = NULL;
req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
io_flags, 0, 0);
io_flags, 0, 0, true);
if (req != NULL) {
return nvme_qpair_submit_request(qpair, req);
} else {
@ -367,7 +477,7 @@ spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *
payload.md = metadata;
req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
io_flags, apptag_mask, apptag);
io_flags, apptag_mask, apptag, true);
if (req != NULL) {
return nvme_qpair_submit_request(qpair, req);
} else {
@ -395,7 +505,7 @@ spdk_nvme_ns_cmd_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
payload.u.sgl.cb_arg = cb_arg;
req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
io_flags, 0, 0);
io_flags, 0, 0, true);
if (req != NULL) {
return nvme_qpair_submit_request(qpair, req);
} else {

View File

@ -1666,12 +1666,15 @@ nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvm
return -1;
}
/* Confirm that this sge is prp compatible. */
if (phys_addr & 0x3 ||
(length < remaining_transfer_len && ((phys_addr + length) & (PAGE_SIZE - 1)))) {
nvme_pcie_fail_request_bad_vtophys(qpair, tr);
return -1;
}
/*
* Any incompatible sges should have been handled up in the splitting routine,
* but assert here as an additional check.
*/
assert((phys_addr & 0x3) == 0); /* Address must be dword aligned. */
/* All SGEs except last must end on a page boundary. */
assert((length >= remaining_transfer_len) || _is_page_aligned(phys_addr + length));
/* All SGe except first must start on a page boundary. */
assert((sge_count == 0) || _is_page_aligned(phys_addr));
data_transferred = nvme_min(remaining_transfer_len, length);

View File

@ -232,6 +232,48 @@ static void build_io_request_7(struct io_request *req)
req->iovs[0].len = 0x10000;
}
static void build_io_request_8(struct io_request *req)
{
req->nseg = 2;
/*
* 1KB for 1st sge, make sure the iov address does not start and end
* at 0x1000 boundary
*/
req->iovs[0].base = spdk_zmalloc(0x1000, 0x1000, NULL);
req->iovs[0].offset = 0x400;
req->iovs[0].len = 0x400;
/*
* 1KB for 1st sge, make sure the iov address does not start and end
* at 0x1000 boundary
*/
req->iovs[1].base = spdk_zmalloc(0x1000, 0x1000, NULL);
req->iovs[1].offset = 0x400;
req->iovs[1].len = 0x400;
}
static void build_io_request_9(struct io_request *req)
{
/*
* Check if mixed PRP complaint and not complaint requests are handled
* properly by spliting them into subrequests.
* Construct buffers with following theme:
*/
const size_t req_len[] = { 2048, 4096, 2048, 4096, 2048, 1024 };
const size_t req_off[] = { 0x800, 0x0, 0x0, 0x100, 0x800, 0x800 };
struct sgl_element *iovs = req->iovs;
uint32_t i;
req->nseg = sizeof(req_len) / sizeof(req_len[0]);
assert(sizeof(req_len) / sizeof(req_len[0]) == sizeof(req_off) / sizeof(req_off[0]));
for (i = 0; i < req->nseg; i++) {
iovs[i].base = spdk_zmalloc(req_off[i] + req_len[i], 0x4000, NULL);
iovs[i].offset = req_off[i];
iovs[i].len = req_len[i];
}
}
typedef void (*nvme_build_io_req_fn_t)(struct io_request *req);
static void
@ -442,7 +484,9 @@ int main(int argc, char **argv)
|| TEST(build_io_request_4)
|| TEST(build_io_request_5)
|| TEST(build_io_request_6)
|| TEST(build_io_request_7)) {
|| TEST(build_io_request_7)
|| TEST(build_io_request_8)
|| TEST(build_io_request_9)) {
#undef TEST
rc = 1;
printf("%s: failed sgl tests\n", iter->name);

View File

@ -58,6 +58,16 @@ static void nvme_request_reset_sgl(void *cb_arg, uint32_t sgl_offset)
static int nvme_request_next_sge(void *cb_arg, void **address, uint32_t *length)
{
uint32_t *lba_count = cb_arg;
/*
* We need to set address to something here, since the SGL splitting code will
* use it to determine PRP compatibility. Just use a rather arbitrary address
* for now - these tests will not actually cause data to be read from or written
* to this address.
*/
*address = (void *)(uintptr_t)0x10000000;
*length = *lba_count;
return 0;
}
@ -187,6 +197,11 @@ prepare_for_test(struct spdk_nvme_ns *ns, struct spdk_nvme_ctrlr *ctrlr,
uint32_t stripe_size)
{
ctrlr->max_xfer_size = max_xfer_size;
/*
* Clear the flags field - we especially want to make sure the SGL_SUPPORTED flag is not set
* so that we test the SGL splitting path.
*/
ctrlr->flags = 0;
memset(ns, 0, sizeof(*ns));
ns->ctrlr = ctrlr;
ns->sector_size = sector_size;
@ -583,11 +598,14 @@ test_nvme_ns_cmd_readv(void)
struct spdk_nvme_qpair qpair;
int rc = 0;
void *cb_arg;
uint32_t lba_count = 256;
uint32_t sector_size = 512;
uint64_t sge_length = lba_count * sector_size;
cb_arg = malloc(512);
prepare_for_test(&ns, &ctrlr, &qpair, 512, 128 * 1024, 0);
rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0, nvme_request_reset_sgl,
nvme_request_next_sge);
prepare_for_test(&ns, &ctrlr, &qpair, sector_size, 128 * 1024, 0);
rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, lba_count, NULL, &sge_length, 0,
nvme_request_reset_sgl, nvme_request_next_sge);
SPDK_CU_ASSERT_FATAL(rc == 0);
SPDK_CU_ASSERT_FATAL(g_request != NULL);
@ -595,7 +613,7 @@ test_nvme_ns_cmd_readv(void)
CU_ASSERT(g_request->payload.type == NVME_PAYLOAD_TYPE_SGL);
CU_ASSERT(g_request->payload.u.sgl.reset_sgl_fn == nvme_request_reset_sgl);
CU_ASSERT(g_request->payload.u.sgl.next_sge_fn == nvme_request_next_sge);
CU_ASSERT(g_request->payload.u.sgl.cb_arg == cb_arg);
CU_ASSERT(g_request->payload.u.sgl.cb_arg == &sge_length);
CU_ASSERT(g_request->cmd.nsid == ns.id);
rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0, nvme_request_reset_sgl,
@ -614,12 +632,14 @@ test_nvme_ns_cmd_writev(void)
struct spdk_nvme_qpair qpair;
int rc = 0;
void *cb_arg;
uint32_t lba_count = 256;
uint32_t sector_size = 512;
uint64_t sge_length = lba_count * sector_size;
cb_arg = malloc(512);
prepare_for_test(&ns, &ctrlr, &qpair, 512, 128 * 1024, 0);
rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0,
nvme_request_reset_sgl,
nvme_request_next_sge);
prepare_for_test(&ns, &ctrlr, &qpair, sector_size, 128 * 1024, 0);
rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, lba_count, NULL, &sge_length, 0,
nvme_request_reset_sgl, nvme_request_next_sge);
SPDK_CU_ASSERT_FATAL(rc == 0);
SPDK_CU_ASSERT_FATAL(g_request != NULL);
@ -627,7 +647,7 @@ test_nvme_ns_cmd_writev(void)
CU_ASSERT(g_request->payload.type == NVME_PAYLOAD_TYPE_SGL);
CU_ASSERT(g_request->payload.u.sgl.reset_sgl_fn == nvme_request_reset_sgl);
CU_ASSERT(g_request->payload.u.sgl.next_sge_fn == nvme_request_next_sge);
CU_ASSERT(g_request->payload.u.sgl.cb_arg == cb_arg);
CU_ASSERT(g_request->payload.u.sgl.cb_arg == &sge_length);
CU_ASSERT(g_request->cmd.nsid == ns.id);
rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0,