nvme/pcie: only set qpair state from qpair's thread

The qpair's state member is only 3 bits of a uint8_t,
and the in_completion_context bit is another bit in that
same uint8_t.

We know that the qpair's state is only ever updated by
one thread, but it is possible that the state could
be modified by one thread, while another thread
is modifying in_completion_context.

in_completion_context is only modified by the thread
that is polling the qpair (or the qpair's poll group).
But with async mode, another thread that has a qpair
on the same PCIe controller could poll its adminq and
reap the SQ completion for the qpair that's owned by
the other thread.

So do *not* set the generic qpair state to CONNECTED
from the SQ completion callback.  Instead just set
the pcie_state to READY, and let the thread that owns
the qpair detect the qpair is READY and set the state
to CONNECTED itself.

Fixes issue #2157.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I9efc0c954504f1841e1c3890ae78211ad0d1990e
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9975
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
This commit is contained in:
Jim Harris 2021-10-21 16:26:05 +00:00
parent 955548a4ae
commit e40bd53175
2 changed files with 25 additions and 11 deletions

View File

@ -456,7 +456,6 @@ nvme_completion_sq_error_delete_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl
}
pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
}
static void
@ -486,12 +485,10 @@ nvme_completion_create_sq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
if (rc != 0) {
SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
}
return;
}
pqpair->pcie_state = NVME_PCIE_QPAIR_READY;
nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
if (ctrlr->shadow_doorbell) {
pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) *
pctrlr->doorbell_stride_u32;
@ -529,7 +526,6 @@ nvme_completion_create_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
if (spdk_nvme_cpl_is_error(cpl)) {
pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
SPDK_ERRLOG("nvme_create_io_cq failed!\n");
return;
}
@ -543,7 +539,6 @@ nvme_completion_create_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
if (rc != 0) {
SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
}
return;
}
@ -861,12 +856,27 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
return -ENXIO;
}
if (spdk_unlikely(pqpair->pcie_state != NVME_PCIE_QPAIR_READY)) {
rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
if (rc < 0) {
return rc;
if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) {
if (pqpair->pcie_state == NVME_PCIE_QPAIR_READY) {
/* It is possible that another thread set the pcie_state to
* QPAIR_READY, if it polled the adminq and processed the SQ
* completion for this qpair. So check for that condition
* here and then update the qpair's state to CONNECTED, since
* we can only set the qpair state from the qpair's thread.
* (Note: this fixed issue #2157.)
*/
nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
} else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) {
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
return -ENXIO;
} else {
rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
if (rc < 0) {
return rc;
} else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) {
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
return -ENXIO;
}
}
return 0;
}

View File

@ -367,10 +367,12 @@ test_nvme_pcie_ctrlr_connect_qpair(void)
CU_ASSERT(req[1].cmd.cdw11_bits.create_io_sq.cqid = 1);
CU_ASSERT(req[1].cmd.dptr.prp.prp1 == 0xDDADBEEF);
pqpair.qpair.state = NVME_QPAIR_CONNECTING;
/* Complete the second request */
req[1].cb_fn(req[1].cb_arg, &cpl);
CU_ASSERT(pqpair.pcie_state == NVME_PCIE_QPAIR_READY);
CU_ASSERT(pqpair.qpair.state == NVME_QPAIR_CONNECTED);
/* State is still CONNECTING until the thread is polled again. */
CU_ASSERT(pqpair.qpair.state == NVME_QPAIR_CONNECTING);
/* doorbell stride and qid are 1 */
CU_ASSERT(pqpair.shadow_doorbell.sq_tdbl == pctrlr.ctrlr.shadow_doorbell + 2);
@ -419,10 +421,12 @@ test_nvme_pcie_ctrlr_connect_qpair(void)
CU_ASSERT(req[1].cmd.cdw11_bits.create_io_sq.cqid = 1);
CU_ASSERT(req[1].cmd.dptr.prp.prp1 == 0xDDADBEEF);
pqpair.qpair.state = NVME_QPAIR_CONNECTING;
/* Complete the second request */
req[1].cb_fn(req[1].cb_arg, &cpl);
CU_ASSERT(pqpair.pcie_state == NVME_PCIE_QPAIR_READY);
CU_ASSERT(pqpair.qpair.state == NVME_QPAIR_CONNECTED);
/* State is still CONNECTING until the thread is polled again. */
CU_ASSERT(pqpair.qpair.state == NVME_QPAIR_CONNECTING);
CU_ASSERT(pqpair.shadow_doorbell.sq_tdbl == NULL);
CU_ASSERT(pqpair.shadow_doorbell.sq_eventidx == NULL);