nvme/pcie: only set qpair state from qpair's thread
The qpair's state member is only 3 bits of a uint8_t, and the in_completion_context bit is another bit in that same uint8_t. We know that the qpair's state is only ever updated by one thread, but it is possible that the state could be modified by one thread, while another thread is modifying in_completion_context. in_completion_context is only modified by the thread that is polling the qpair (or the qpair's poll group). But with async mode, another thread that has a qpair on the same PCIe controller could poll its adminq and reap the SQ completion for the qpair that's owned by the other thread. So do *not* set the generic qpair state to CONNECTED from the SQ completion callback. Instead just set the pcie_state to READY, and let the thread that owns the qpair detect the qpair is READY and set the state to CONNECTED itself. Fixes issue #2157. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I9efc0c954504f1841e1c3890ae78211ad0d1990e Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9975 Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot
This commit is contained in:
parent
955548a4ae
commit
e40bd53175
@ -456,7 +456,6 @@ nvme_completion_sq_error_delete_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl
|
||||
}
|
||||
|
||||
pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
|
||||
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -486,12 +485,10 @@ nvme_completion_create_sq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
|
||||
pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
|
||||
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
|
||||
}
|
||||
return;
|
||||
}
|
||||
pqpair->pcie_state = NVME_PCIE_QPAIR_READY;
|
||||
nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
|
||||
if (ctrlr->shadow_doorbell) {
|
||||
pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) *
|
||||
pctrlr->doorbell_stride_u32;
|
||||
@ -529,7 +526,6 @@ nvme_completion_create_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
|
||||
|
||||
if (spdk_nvme_cpl_is_error(cpl)) {
|
||||
pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
|
||||
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
|
||||
SPDK_ERRLOG("nvme_create_io_cq failed!\n");
|
||||
return;
|
||||
}
|
||||
@ -543,7 +539,6 @@ nvme_completion_create_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
|
||||
pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
|
||||
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -861,12 +856,27 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
if (spdk_unlikely(pqpair->pcie_state != NVME_PCIE_QPAIR_READY)) {
|
||||
rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
|
||||
if (rc < 0) {
|
||||
return rc;
|
||||
if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) {
|
||||
if (pqpair->pcie_state == NVME_PCIE_QPAIR_READY) {
|
||||
/* It is possible that another thread set the pcie_state to
|
||||
* QPAIR_READY, if it polled the adminq and processed the SQ
|
||||
* completion for this qpair. So check for that condition
|
||||
* here and then update the qpair's state to CONNECTED, since
|
||||
* we can only set the qpair state from the qpair's thread.
|
||||
* (Note: this fixed issue #2157.)
|
||||
*/
|
||||
nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
|
||||
} else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) {
|
||||
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
|
||||
return -ENXIO;
|
||||
} else {
|
||||
rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
|
||||
if (rc < 0) {
|
||||
return rc;
|
||||
} else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) {
|
||||
nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
|
||||
return -ENXIO;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -367,10 +367,12 @@ test_nvme_pcie_ctrlr_connect_qpair(void)
|
||||
CU_ASSERT(req[1].cmd.cdw11_bits.create_io_sq.cqid = 1);
|
||||
CU_ASSERT(req[1].cmd.dptr.prp.prp1 == 0xDDADBEEF);
|
||||
|
||||
pqpair.qpair.state = NVME_QPAIR_CONNECTING;
|
||||
/* Complete the second request */
|
||||
req[1].cb_fn(req[1].cb_arg, &cpl);
|
||||
CU_ASSERT(pqpair.pcie_state == NVME_PCIE_QPAIR_READY);
|
||||
CU_ASSERT(pqpair.qpair.state == NVME_QPAIR_CONNECTED);
|
||||
/* State is still CONNECTING until the thread is polled again. */
|
||||
CU_ASSERT(pqpair.qpair.state == NVME_QPAIR_CONNECTING);
|
||||
|
||||
/* doorbell stride and qid are 1 */
|
||||
CU_ASSERT(pqpair.shadow_doorbell.sq_tdbl == pctrlr.ctrlr.shadow_doorbell + 2);
|
||||
@ -419,10 +421,12 @@ test_nvme_pcie_ctrlr_connect_qpair(void)
|
||||
CU_ASSERT(req[1].cmd.cdw11_bits.create_io_sq.cqid = 1);
|
||||
CU_ASSERT(req[1].cmd.dptr.prp.prp1 == 0xDDADBEEF);
|
||||
|
||||
pqpair.qpair.state = NVME_QPAIR_CONNECTING;
|
||||
/* Complete the second request */
|
||||
req[1].cb_fn(req[1].cb_arg, &cpl);
|
||||
CU_ASSERT(pqpair.pcie_state == NVME_PCIE_QPAIR_READY);
|
||||
CU_ASSERT(pqpair.qpair.state == NVME_QPAIR_CONNECTED);
|
||||
/* State is still CONNECTING until the thread is polled again. */
|
||||
CU_ASSERT(pqpair.qpair.state == NVME_QPAIR_CONNECTING);
|
||||
|
||||
CU_ASSERT(pqpair.shadow_doorbell.sq_tdbl == NULL);
|
||||
CU_ASSERT(pqpair.shadow_doorbell.sq_eventidx == NULL);
|
||||
|
Loading…
Reference in New Issue
Block a user