nvme: Fix submission queue overflow

SPDK can submit more commands to remote NVMf target than allowed by
negotiated queue size. SPDK submits up to SQSIZE commands, but only
SQSIZE-1 are allowed.

Here is a relevant quote from NVMe over Fabrics rev.1.1a ch.2.4.1
“Submission Queue Flow Control Negotiation”:

If SQ flow control is disabled, then the host should limit the number
of outstanding commands for a queue pair to be less than the size of
the Submission Queue. If the controller detects that the number of
outstanding commands for a queue pair is greater than or equal to the
size of the Submission Queue, then the controller shall:

a) stop processing commands and set the Controller Fatal
Status (CSTS.CFS) bit to ‘1’ (refer to section 10.5 in the NVMe Base
specification); and

b) terminate the NVMe Transport connection and end the association
between the host and the controller.

Signed-off-by: Evgeniy Kochetov <evgeniik@nvidia.com>
Change-Id: Ifbcf5d51911fc4ddcea1f7cde3135571648606f3
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11413
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
This commit is contained in:
Evgeniy Kochetov 2022-02-04 14:52:01 +02:00 committed by Jim Harris
parent 486426529d
commit 834e3c5a0e
5 changed files with 84 additions and 16 deletions

View File

@ -55,10 +55,12 @@ extern "C" {
#define SPDK_NVME_MAX_IO_QUEUES (65535)
#define SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES 2
#define SPDK_NVME_QUEUE_MIN_ENTRIES (2)
#define SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES SPDK_NVME_QUEUE_MIN_ENTRIES
#define SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES 4096
#define SPDK_NVME_IO_QUEUE_MIN_ENTRIES 2
#define SPDK_NVME_IO_QUEUE_MIN_ENTRIES SPDK_NVME_QUEUE_MIN_ENTRIES
#define SPDK_NVME_IO_QUEUE_MAX_ENTRIES 65536
/**

View File

@ -508,7 +508,7 @@ nvme_rdma_qpair_process_cm_event(struct nvme_rdma_qpair *rqpair)
rc = -1;
} else {
SPDK_DEBUGLOG(nvme, "Requested queue depth %d. Target receive queue depth %d.\n",
rqpair->num_entries, accept_data->crqsize);
rqpair->num_entries + 1, accept_data->crqsize);
}
break;
case RDMA_CM_EVENT_DISCONNECTED:
@ -1151,8 +1151,8 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
assert(rctrlr != NULL);
request_data.qid = rqpair->qpair.id;
request_data.hrqsize = rqpair->num_entries;
request_data.hsqsize = rqpair->num_entries - 1;
request_data.hrqsize = rqpair->num_entries + 1;
request_data.hsqsize = rqpair->num_entries;
request_data.cntlid = ctrlr->cntlid;
param.private_data = &request_data;
@ -1311,7 +1311,7 @@ _nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_q
return -1;
}
rc = nvme_fabric_qpair_connect(&rqpair->qpair, rqpair->num_entries);
rc = nvme_fabric_qpair_connect(&rqpair->qpair, rqpair->num_entries + 1);
if (rc < 0) {
rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
@ -1750,13 +1750,23 @@ nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *qpair;
int rc;
if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) {
SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n",
qsize, SPDK_NVME_QUEUE_MIN_ENTRIES);
return NULL;
}
rqpair = nvme_rdma_calloc(1, sizeof(struct nvme_rdma_qpair));
if (!rqpair) {
SPDK_ERRLOG("failed to get create rqpair\n");
return NULL;
}
rqpair->num_entries = qsize;
/* Set num_entries one less than queue size. According to NVMe
* and NVMe-oF specs we can not submit queue size requests,
* one slot shall always remain empty.
*/
rqpair->num_entries = qsize - 1;
rqpair->delay_cmd_submit = delay_cmd_submit;
qpair = &rqpair->qpair;
rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, false);

View File

@ -1964,7 +1964,7 @@ nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvm
rc = -EAGAIN;
break;
case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND:
rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries);
rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1);
if (rc < 0) {
SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
break;
@ -2052,13 +2052,23 @@ nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *qpair;
int rc;
if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) {
SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n",
qsize, SPDK_NVME_QUEUE_MIN_ENTRIES);
return NULL;
}
tqpair = calloc(1, sizeof(struct nvme_tcp_qpair));
if (!tqpair) {
SPDK_ERRLOG("failed to get create tqpair\n");
return NULL;
}
tqpair->num_entries = qsize;
/* Set num_entries one less than queue size. According to NVMe
* and NVMe-oF specs we can not submit queue size requests,
* one slot shall always remain empty.
*/
tqpair->num_entries = qsize - 1;
qpair = &tqpair->qpair;
rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async);
if (rc != 0) {

View File

@ -569,7 +569,7 @@ test_nvme_rdma_ctrlr_create_qpair(void)
CU_ASSERT(qpair != NULL);
rqpair = SPDK_CONTAINEROF(qpair, struct nvme_rdma_qpair, qpair);
CU_ASSERT(qpair == &rqpair->qpair);
CU_ASSERT(rqpair->num_entries == qsize);
CU_ASSERT(rqpair->num_entries == qsize - 1);
CU_ASSERT(rqpair->delay_cmd_submit == false);
CU_ASSERT(rqpair->rsp_sgls != NULL);
CU_ASSERT(rqpair->rsp_recv_wrs != NULL);
@ -580,13 +580,37 @@ test_nvme_rdma_ctrlr_create_qpair(void)
nvme_rdma_free(rqpair);
rqpair = NULL;
/* Test case 2: queue qsize zero. ExpectL FAIL */
/* Test case 2: queue size 2. Expect: PASS */
qsize = 2;
qpair = nvme_rdma_ctrlr_create_qpair(&ctrlr, qid, qsize,
SPDK_NVME_QPRIO_URGENT, 1,
false);
CU_ASSERT(qpair != NULL);
rqpair = SPDK_CONTAINEROF(qpair, struct nvme_rdma_qpair, qpair);
CU_ASSERT(rqpair->num_entries == qsize - 1);
CU_ASSERT(rqpair->rsp_sgls != NULL);
CU_ASSERT(rqpair->rsp_recv_wrs != NULL);
CU_ASSERT(rqpair->rsps != NULL);
nvme_rdma_free_reqs(rqpair);
nvme_rdma_free_rsps(rqpair);
nvme_rdma_free(rqpair);
rqpair = NULL;
/* Test case 3: queue size zero. Expect: FAIL */
qsize = 0;
qpair = nvme_rdma_ctrlr_create_qpair(&ctrlr, qid, qsize,
SPDK_NVME_QPRIO_URGENT, 1,
false);
SPDK_CU_ASSERT_FATAL(qpair == NULL);
/* Test case 4: queue size 1. Expect: FAIL */
qsize = 1;
qpair = nvme_rdma_ctrlr_create_qpair(&ctrlr, qid, qsize,
SPDK_NVME_QPRIO_URGENT, 1,
false);
SPDK_CU_ASSERT_FATAL(qpair == NULL);
}
DEFINE_STUB(ibv_create_cq, struct ibv_cq *, (struct ibv_context *context, int cqe, void *cq_context,
@ -764,7 +788,7 @@ test_nvme_rdma_ctrlr_construct(void)
SPDK_CU_ASSERT_FATAL(ctrlr->adminq != NULL);
rqpair = SPDK_CONTAINEROF(ctrlr->adminq, struct nvme_rdma_qpair, qpair);
CU_ASSERT(rqpair->num_entries == opts.admin_queue_size);
CU_ASSERT(rqpair->num_entries == opts.admin_queue_size - 1);
CU_ASSERT(rqpair->delay_cmd_submit == false);
CU_ASSERT(rqpair->rsp_sgls != NULL);
CU_ASSERT(rqpair->rsp_recv_wrs != NULL);

View File

@ -3,7 +3,7 @@
*
* Copyright (c) Intel Corporation.
* All rights reserved.
* Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -1493,8 +1493,8 @@ test_nvme_tcp_ctrlr_create_io_qpair(void)
struct spdk_nvme_qpair *qpair = NULL;
struct spdk_nvme_ctrlr ctrlr = {};
uint16_t qid = 1;
const struct spdk_nvme_io_qpair_opts opts = {
.io_queue_size = 1,
struct spdk_nvme_io_qpair_opts opts = {
.io_queue_size = 2,
.qprio = SPDK_NVME_QPRIO_URGENT,
.io_queue_requests = 1,
};
@ -1516,11 +1516,33 @@ test_nvme_tcp_ctrlr_create_io_qpair(void)
CU_ASSERT(qpair->qprio == SPDK_NVME_QPRIO_URGENT);
CU_ASSERT(qpair->trtype == SPDK_NVME_TRANSPORT_TCP);
CU_ASSERT(qpair->poll_group == (void *)0xDEADBEEF);
CU_ASSERT(tqpair->num_entries = 1);
CU_ASSERT(tqpair->num_entries == 1);
free(tqpair->tcp_reqs);
spdk_free(tqpair->send_pdus);
free(tqpair);
/* Max queue size shall pass */
opts.io_queue_size = 0xffff;
qpair = nvme_tcp_ctrlr_create_io_qpair(&ctrlr, qid, &opts);
tqpair = nvme_tcp_qpair(qpair);
CU_ASSERT(qpair != NULL);
CU_ASSERT(tqpair->num_entries == 0xfffe);
free(tqpair->tcp_reqs);
spdk_free(tqpair->send_pdus);
free(tqpair);
/* Queue size 0 shall fail */
opts.io_queue_size = 0;
qpair = nvme_tcp_ctrlr_create_io_qpair(&ctrlr, qid, &opts);
CU_ASSERT(qpair == NULL);
/* Queue size 1 shall fail */
opts.io_queue_size = 1;
qpair = nvme_tcp_ctrlr_create_io_qpair(&ctrlr, qid, &opts);
CU_ASSERT(qpair == NULL);
}
static void