nvmf/vfio-user: implement device quiesce APIs
libvfio-user will call quiesce callback when there are memory region add/remove and device state change requests from client, and in the quiesce callback, we will pause the subsystem so that it's safe to do everything after it, then after quiesce callback, we will resume the subsystem. The quiesce callback is also used in live migration, each device state change will quiesce the device first. Change-Id: I3a6a0320ad76c6b2d1d65c754b9f79cce5c9c683 Signed-off-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10620 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
be0aae0658
commit
b3cd421ffd
@ -214,6 +214,8 @@ struct nvmf_vfio_user_ctrlr {
|
||||
struct spdk_thread *thread;
|
||||
struct spdk_poller *vfu_ctx_poller;
|
||||
|
||||
bool queued_quiesce;
|
||||
|
||||
bool reset_shn;
|
||||
|
||||
uint16_t cntlid;
|
||||
@ -1622,7 +1624,7 @@ memory_region_add_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
|
||||
pthread_mutex_unlock(&endpoint->lock);
|
||||
}
|
||||
|
||||
static int
|
||||
static void
|
||||
memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
|
||||
{
|
||||
struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
|
||||
@ -1632,7 +1634,7 @@ memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
|
||||
int ret = 0;
|
||||
|
||||
if (!info->vaddr) {
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (((uintptr_t)info->mapping.iov_base & MASK_2MB) ||
|
||||
@ -1640,7 +1642,7 @@ memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
|
||||
SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr,
|
||||
(uintptr_t)info->mapping.iov_base,
|
||||
(uintptr_t)info->mapping.iov_base + info->mapping.iov_len);
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
assert(endpoint != NULL);
|
||||
@ -1678,8 +1680,6 @@ memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
|
||||
ret);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1943,6 +1943,125 @@ init_pci_config_space(vfu_pci_config_space_t *p)
|
||||
p->hdr.intr.ipin = 0x1;
|
||||
}
|
||||
|
||||
static void
|
||||
vfio_user_dev_quiesce_done(struct spdk_nvmf_subsystem *subsystem,
|
||||
void *cb_arg, int status);
|
||||
|
||||
static void
|
||||
vfio_user_dev_quiesce_resume_done(struct spdk_nvmf_subsystem *subsystem,
|
||||
void *cb_arg, int status)
|
||||
{
|
||||
struct nvmf_vfio_user_ctrlr *vu_ctrlr = cb_arg;
|
||||
struct nvmf_vfio_user_endpoint *endpoint = vu_ctrlr->endpoint;
|
||||
int ret;
|
||||
|
||||
SPDK_DEBUGLOG(nvmf_vfio, "%s resumed done with status %d\n", ctrlr_id(vu_ctrlr), status);
|
||||
|
||||
vu_ctrlr->state = VFIO_USER_CTRLR_RUNNING;
|
||||
|
||||
/* Basically, once we call `vfu_device_quiesced` the device is unquiesced from
|
||||
* libvfio-user's perspective so from the moment `vfio_user_dev_quiesce_done` returns
|
||||
* libvfio-user might quiesce the device again. However, because the NVMf subsytem is
|
||||
* an asynchronous operation, this quiesce might come _before_ the NVMf subsystem has
|
||||
* been resumed, so in the callback of `spdk_nvmf_subsystem_resume` we need to check
|
||||
* whether a quiesce was requested.
|
||||
*/
|
||||
if (vu_ctrlr->queued_quiesce) {
|
||||
SPDK_DEBUGLOG(nvmf_vfio, "%s has queued quiesce event, pause again\n", ctrlr_id(vu_ctrlr));
|
||||
vu_ctrlr->state = VFIO_USER_CTRLR_PAUSING;
|
||||
ret = spdk_nvmf_subsystem_pause((struct spdk_nvmf_subsystem *)endpoint->subsystem, 0,
|
||||
vfio_user_dev_quiesce_done, vu_ctrlr);
|
||||
if (ret < 0) {
|
||||
vu_ctrlr->state = VFIO_USER_CTRLR_RUNNING;
|
||||
SPDK_ERRLOG("%s: failed to pause, ret=%d\n", endpoint_id(endpoint), ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vfio_user_dev_quiesce_done(struct spdk_nvmf_subsystem *subsystem,
|
||||
void *cb_arg, int status)
|
||||
{
|
||||
struct nvmf_vfio_user_ctrlr *vu_ctrlr = cb_arg;
|
||||
struct nvmf_vfio_user_endpoint *endpoint = vu_ctrlr->endpoint;
|
||||
int ret;
|
||||
|
||||
SPDK_DEBUGLOG(nvmf_vfio, "%s paused done with status %d\n", ctrlr_id(vu_ctrlr), status);
|
||||
|
||||
assert(vu_ctrlr->state == VFIO_USER_CTRLR_PAUSING);
|
||||
vu_ctrlr->state = VFIO_USER_CTRLR_PAUSED;
|
||||
vfu_device_quiesced(endpoint->vfu_ctx, status);
|
||||
vu_ctrlr->queued_quiesce = false;
|
||||
|
||||
SPDK_DEBUGLOG(nvmf_vfio, "%s start to resume\n", ctrlr_id(vu_ctrlr));
|
||||
vu_ctrlr->state = VFIO_USER_CTRLR_RESUMING;
|
||||
ret = spdk_nvmf_subsystem_resume((struct spdk_nvmf_subsystem *)endpoint->subsystem,
|
||||
vfio_user_dev_quiesce_resume_done, vu_ctrlr);
|
||||
if (ret < 0) {
|
||||
vu_ctrlr->state = VFIO_USER_CTRLR_PAUSED;
|
||||
SPDK_ERRLOG("%s: failed to resume, ret=%d\n", endpoint_id(endpoint), ret);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vfio_user_dev_quiesce_cb(vfu_ctx_t *vfu_ctx)
|
||||
{
|
||||
struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
|
||||
struct nvmf_vfio_user_ctrlr *vu_ctrlr = endpoint->ctrlr;
|
||||
int ret;
|
||||
|
||||
if (!vu_ctrlr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* NVMf library will destruct controller when no
|
||||
* connected queue pairs.
|
||||
*/
|
||||
if (!nvmf_subsystem_get_ctrlr((struct spdk_nvmf_subsystem *)endpoint->subsystem,
|
||||
vu_ctrlr->cntlid)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
SPDK_DEBUGLOG(nvmf_vfio, "%s starts to quiesce\n", ctrlr_id(vu_ctrlr));
|
||||
|
||||
/* There is no race condition here as device quiesce callback
|
||||
* and nvmf_prop_set_cc() are running in the same thread context.
|
||||
*/
|
||||
if (!vu_ctrlr->ctrlr->vcprop.cc.bits.en) {
|
||||
return 0;
|
||||
} else if (!vu_ctrlr->ctrlr->vcprop.csts.bits.rdy) {
|
||||
return 0;
|
||||
} else if (vu_ctrlr->ctrlr->vcprop.csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (vu_ctrlr->state) {
|
||||
case VFIO_USER_CTRLR_PAUSED:
|
||||
return 0;
|
||||
case VFIO_USER_CTRLR_RUNNING:
|
||||
vu_ctrlr->state = VFIO_USER_CTRLR_PAUSING;
|
||||
ret = spdk_nvmf_subsystem_pause((struct spdk_nvmf_subsystem *)endpoint->subsystem, 0,
|
||||
vfio_user_dev_quiesce_done, vu_ctrlr);
|
||||
if (ret < 0) {
|
||||
vu_ctrlr->state = VFIO_USER_CTRLR_RUNNING;
|
||||
SPDK_ERRLOG("%s: failed to pause, ret=%d\n", endpoint_id(endpoint), ret);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case VFIO_USER_CTRLR_RESUMING:
|
||||
vu_ctrlr->queued_quiesce = true;
|
||||
SPDK_DEBUGLOG(nvmf_vfio, "%s is busy to quiesce, current state %u\n", ctrlr_id(vu_ctrlr),
|
||||
vu_ctrlr->state);
|
||||
break;
|
||||
default:
|
||||
assert(vu_ctrlr->state != VFIO_USER_CTRLR_PAUSING);
|
||||
break;
|
||||
}
|
||||
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
vfio_user_dev_info_fill(struct nvmf_vfio_user_transport *vu_transport,
|
||||
struct nvmf_vfio_user_endpoint *endpoint)
|
||||
@ -2058,6 +2177,8 @@ vfio_user_dev_info_fill(struct nvmf_vfio_user_transport *vu_transport,
|
||||
return ret;
|
||||
}
|
||||
|
||||
vfu_setup_device_quiesce_cb(vfu_ctx, vfio_user_dev_quiesce_cb);
|
||||
|
||||
ret = vfu_realize_ctx(vfu_ctx);
|
||||
if (ret < 0) {
|
||||
SPDK_ERRLOG("vfu_ctx %p failed to realize\n", vfu_ctx);
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 56842967566dcf4b89514949a6e88ba89eeaf268
|
||||
Subproject commit 17769cf1af093dfb4b9bc3347ae39324029989ac
|
@ -50,6 +50,10 @@ DEFINE_STUB(spdk_nvmf_qpair_disconnect, int, (struct spdk_nvmf_qpair *qpair,
|
||||
DEFINE_STUB(spdk_nvmf_subsystem_get_nqn, const char *,
|
||||
(const struct spdk_nvmf_subsystem *subsystem), NULL);
|
||||
DEFINE_STUB(spdk_bdev_get_block_size, uint32_t, (const struct spdk_bdev *bdev), 512);
|
||||
DEFINE_STUB(spdk_nvmf_subsystem_pause, int, (struct spdk_nvmf_subsystem *subsystem,
|
||||
uint32_t nsid, spdk_nvmf_subsystem_state_change_done cb_fn, void *cb_arg), 0);
|
||||
DEFINE_STUB(spdk_nvmf_subsystem_resume, int, (struct spdk_nvmf_subsystem *subsystem,
|
||||
spdk_nvmf_subsystem_state_change_done cb_fn, void *cb_arg), 0);
|
||||
DEFINE_STUB_V(nvmf_ctrlr_abort_aer, (struct spdk_nvmf_ctrlr *ctrlr));
|
||||
DEFINE_STUB(nvmf_ctrlr_async_event_error_event, int, (struct spdk_nvmf_ctrlr *ctrlr,
|
||||
union spdk_nvme_async_event_completion event), 0);
|
||||
@ -58,6 +62,8 @@ DEFINE_STUB(spdk_nvmf_qpair_get_listen_trid, int, (struct spdk_nvmf_qpair *qpair
|
||||
struct spdk_nvme_transport_id *trid), 0);
|
||||
DEFINE_STUB(spdk_nvme_transport_id_compare, int, (const struct spdk_nvme_transport_id *trid1,
|
||||
const struct spdk_nvme_transport_id *trid2), 0);
|
||||
DEFINE_STUB(nvmf_subsystem_get_ctrlr, struct spdk_nvmf_ctrlr *,
|
||||
(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid), NULL);
|
||||
|
||||
static void *
|
||||
gpa_to_vva(void *prv, uint64_t addr, uint64_t len, int prot)
|
||||
|
Loading…
Reference in New Issue
Block a user