bhyve: Snapshot impovements for 'blockif' backend
When pausing a block I/O device model as part of suspending a VM, wait for all active block I/O requests to finish before saving snapshot data. This avoids having to save information about in-flight requests both in the block_if layer and in storage device models. For the AHCI device model, the queues are now guaranteed to be idle when taking a snapshot, so remove the code to save queue state and rely on the initial state in a resumed VM having all queues already idle. This will also simplify adding NVMe snapshot support in the future. Reviewed by: jhb Sponsored by: vStack Differential Revision: https://reviews.freebsd.org/D26267
This commit is contained in:
parent
3a4256dd86
commit
cd9618bdb2
@ -109,11 +109,9 @@ struct blockif_ctxt {
|
||||
int bc_psectoff;
|
||||
int bc_closing;
|
||||
int bc_paused;
|
||||
int bc_work_count;
|
||||
pthread_t bc_btid[BLOCKIF_NUMTHR];
|
||||
pthread_mutex_t bc_mtx;
|
||||
pthread_cond_t bc_cond;
|
||||
pthread_cond_t bc_paused_cond;
|
||||
pthread_cond_t bc_work_done_cond;
|
||||
blockif_resize_cb *bc_resize_cb;
|
||||
void *bc_resize_cb_arg;
|
||||
@ -362,6 +360,12 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
|
||||
(*br->br_callback)(br, err);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
blockif_empty(const struct blockif_ctxt *bc)
|
||||
{
|
||||
return (TAILQ_EMPTY(&bc->bc_pendq) && TAILQ_EMPTY(&bc->bc_busyq));
|
||||
}
|
||||
|
||||
static void *
|
||||
blockif_thr(void *arg)
|
||||
{
|
||||
@ -379,30 +383,21 @@ blockif_thr(void *arg)
|
||||
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
for (;;) {
|
||||
bc->bc_work_count++;
|
||||
|
||||
/* We cannot process work if the interface is paused */
|
||||
while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) {
|
||||
while (blockif_dequeue(bc, t, &be)) {
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
blockif_proc(bc, be, buf);
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
blockif_complete(bc, be);
|
||||
}
|
||||
|
||||
bc->bc_work_count--;
|
||||
|
||||
/* If none of the workers are busy, notify the main thread */
|
||||
if (bc->bc_work_count == 0)
|
||||
/* If none to work, notify the main thread */
|
||||
if (blockif_empty(bc))
|
||||
pthread_cond_broadcast(&bc->bc_work_done_cond);
|
||||
|
||||
/* Check ctxt status here to see if exit requested */
|
||||
if (bc->bc_closing)
|
||||
break;
|
||||
|
||||
/* Make all worker threads wait here if the device is paused */
|
||||
while (bc->bc_paused)
|
||||
pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx);
|
||||
|
||||
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
|
||||
}
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
@ -638,8 +633,6 @@ blockif_open(nvlist_t *nvl, const char *ident)
|
||||
pthread_mutex_init(&bc->bc_mtx, NULL);
|
||||
pthread_cond_init(&bc->bc_cond, NULL);
|
||||
bc->bc_paused = 0;
|
||||
bc->bc_work_count = 0;
|
||||
pthread_cond_init(&bc->bc_paused_cond, NULL);
|
||||
pthread_cond_init(&bc->bc_work_done_cond, NULL);
|
||||
TAILQ_INIT(&bc->bc_freeq);
|
||||
TAILQ_INIT(&bc->bc_pendq);
|
||||
@ -737,6 +730,7 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
|
||||
err = 0;
|
||||
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
assert(!bc->bc_paused);
|
||||
if (!TAILQ_EMPTY(&bc->bc_freeq)) {
|
||||
/*
|
||||
* Enqueue and inform the block i/o thread
|
||||
@ -761,7 +755,6 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
|
||||
int
|
||||
blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (blockif_request(bc, breq, BOP_READ));
|
||||
}
|
||||
@ -769,7 +762,6 @@ blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
int
|
||||
blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (blockif_request(bc, breq, BOP_WRITE));
|
||||
}
|
||||
@ -777,7 +769,6 @@ blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
int
|
||||
blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (blockif_request(bc, breq, BOP_FLUSH));
|
||||
}
|
||||
@ -785,7 +776,6 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
int
|
||||
blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (blockif_request(bc, breq, BOP_DELETE));
|
||||
}
|
||||
@ -955,7 +945,6 @@ blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
|
||||
off_t
|
||||
blockif_size(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (bc->bc_size);
|
||||
}
|
||||
@ -963,7 +952,6 @@ blockif_size(struct blockif_ctxt *bc)
|
||||
int
|
||||
blockif_sectsz(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (bc->bc_sectsz);
|
||||
}
|
||||
@ -971,7 +959,6 @@ blockif_sectsz(struct blockif_ctxt *bc)
|
||||
void
|
||||
blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
*size = bc->bc_psectsz;
|
||||
*off = bc->bc_psectoff;
|
||||
@ -980,7 +967,6 @@ blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
|
||||
int
|
||||
blockif_queuesz(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (BLOCKIF_MAXREQ - 1);
|
||||
}
|
||||
@ -988,7 +974,6 @@ blockif_queuesz(struct blockif_ctxt *bc)
|
||||
int
|
||||
blockif_is_ro(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (bc->bc_rdonly);
|
||||
}
|
||||
@ -996,7 +981,6 @@ blockif_is_ro(struct blockif_ctxt *bc)
|
||||
int
|
||||
blockif_candelete(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (bc->bc_candelete);
|
||||
}
|
||||
@ -1012,7 +996,7 @@ blockif_pause(struct blockif_ctxt *bc)
|
||||
bc->bc_paused = 1;
|
||||
|
||||
/* The interface is paused. Wait for workers to finish their work */
|
||||
while (bc->bc_work_count)
|
||||
while (!blockif_empty(bc))
|
||||
pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx);
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
|
||||
@ -1029,71 +1013,6 @@ blockif_resume(struct blockif_ctxt *bc)
|
||||
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
bc->bc_paused = 0;
|
||||
/* resume the threads waiting for paused */
|
||||
pthread_cond_broadcast(&bc->bc_paused_cond);
|
||||
/* kick the threads after restore */
|
||||
pthread_cond_broadcast(&bc->bc_cond);
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
}
|
||||
|
||||
int
|
||||
blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta)
|
||||
{
|
||||
int i;
|
||||
struct iovec *iov;
|
||||
int ret;
|
||||
|
||||
SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done);
|
||||
|
||||
/*
|
||||
* XXX: The callback and parameter must be filled by the virtualized
|
||||
* device that uses the interface, during its init; we're not touching
|
||||
* them here.
|
||||
*/
|
||||
|
||||
/* Snapshot the iovecs. */
|
||||
for (i = 0; i < br->br_iovcnt; i++) {
|
||||
iov = &br->br_iov[i];
|
||||
|
||||
SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done);
|
||||
|
||||
/* We assume the iov is a guest-mapped address. */
|
||||
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len,
|
||||
false, meta, ret, done);
|
||||
}
|
||||
|
||||
done:
|
||||
return (ret);
|
||||
}
|
||||
|
||||
int
|
||||
blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (bc->bc_paused == 0) {
|
||||
fprintf(stderr, "%s: Snapshot failed: "
|
||||
"interface not paused.\r\n", __func__);
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done);
|
||||
|
||||
done:
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
return (ret);
|
||||
}
|
||||
#endif
|
||||
#endif /* BHYVE_SNAPSHOT */
|
||||
|
@ -87,10 +87,6 @@ int blockif_close(struct blockif_ctxt *bc);
|
||||
#ifdef BHYVE_SNAPSHOT
|
||||
void blockif_pause(struct blockif_ctxt *bc);
|
||||
void blockif_resume(struct blockif_ctxt *bc);
|
||||
int blockif_snapshot_req(struct blockif_req *br,
|
||||
struct vm_snapshot_meta *meta);
|
||||
int blockif_snapshot(struct blockif_ctxt *bc,
|
||||
struct vm_snapshot_meta *meta);
|
||||
#endif
|
||||
|
||||
#endif /* _BLOCK_IF_H_ */
|
||||
|
@ -2562,114 +2562,14 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
|
||||
}
|
||||
|
||||
#ifdef BHYVE_SNAPSHOT
|
||||
static int
|
||||
pci_ahci_snapshot_save_queues(struct ahci_port *port,
|
||||
struct vm_snapshot_meta *meta)
|
||||
{
|
||||
int ret;
|
||||
int idx;
|
||||
struct ahci_ioreq *ioreq;
|
||||
|
||||
STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
|
||||
idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
|
||||
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
|
||||
}
|
||||
|
||||
idx = -1;
|
||||
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
|
||||
|
||||
TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
|
||||
idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
|
||||
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
|
||||
|
||||
/*
|
||||
* Snapshot only the busy requests; other requests are
|
||||
* not valid.
|
||||
*/
|
||||
ret = blockif_snapshot_req(&ioreq->io_req, meta);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "%s: failed to snapshot req\r\n",
|
||||
__func__);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
idx = -1;
|
||||
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
|
||||
|
||||
done:
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_ahci_snapshot_restore_queues(struct ahci_port *port,
|
||||
struct vm_snapshot_meta *meta)
|
||||
{
|
||||
int ret;
|
||||
int idx;
|
||||
struct ahci_ioreq *ioreq;
|
||||
|
||||
/* Empty the free queue before restoring. */
|
||||
while (!STAILQ_EMPTY(&port->iofhd))
|
||||
STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
|
||||
|
||||
/* Restore the free queue. */
|
||||
while (1) {
|
||||
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
|
||||
if (idx == -1)
|
||||
break;
|
||||
|
||||
STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
|
||||
}
|
||||
|
||||
/* Restore the busy queue. */
|
||||
while (1) {
|
||||
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
|
||||
if (idx == -1)
|
||||
break;
|
||||
|
||||
ioreq = &port->ioreq[idx];
|
||||
TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
|
||||
|
||||
/*
|
||||
* Restore only the busy requests; other requests are
|
||||
* not valid.
|
||||
*/
|
||||
ret = blockif_snapshot_req(&ioreq->io_req, meta);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "%s: failed to restore request\r\n",
|
||||
__func__);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Re-enqueue the requests in the block interface. */
|
||||
if (ioreq->readop)
|
||||
ret = blockif_read(port->bctx, &ioreq->io_req);
|
||||
else
|
||||
ret = blockif_write(port->bctx, &ioreq->io_req);
|
||||
|
||||
if (ret != 0) {
|
||||
fprintf(stderr,
|
||||
"%s: failed to re-enqueue request\r\n",
|
||||
__func__);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_ahci_snapshot(struct vm_snapshot_meta *meta)
|
||||
{
|
||||
int i, j, ret;
|
||||
int i, ret;
|
||||
void *bctx;
|
||||
struct pci_devinst *pi;
|
||||
struct pci_ahci_softc *sc;
|
||||
struct ahci_port *port;
|
||||
struct ahci_cmd_hdr *hdr;
|
||||
struct ahci_ioreq *ioreq;
|
||||
|
||||
pi = meta->dev_data;
|
||||
sc = pi->pi_arg;
|
||||
@ -2753,43 +2653,7 @@ pci_ahci_snapshot(struct vm_snapshot_meta *meta)
|
||||
SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
|
||||
|
||||
for (j = 0; j < port->ioqsz; j++) {
|
||||
ioreq = &port->ioreq[j];
|
||||
|
||||
/* blockif_req snapshot done only for busy requests. */
|
||||
hdr = (struct ahci_cmd_hdr *)(port->cmd_lst +
|
||||
ioreq->slot * AHCI_CL_SIZE);
|
||||
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
|
||||
0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry),
|
||||
false, meta, ret, done);
|
||||
|
||||
SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
|
||||
SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
|
||||
}
|
||||
|
||||
/* Perform save / restore specific operations. */
|
||||
if (meta->op == VM_SNAPSHOT_SAVE) {
|
||||
ret = pci_ahci_snapshot_save_queues(port, meta);
|
||||
if (ret != 0)
|
||||
goto done;
|
||||
} else if (meta->op == VM_SNAPSHOT_RESTORE) {
|
||||
ret = pci_ahci_snapshot_restore_queues(port, meta);
|
||||
if (ret != 0)
|
||||
goto done;
|
||||
} else {
|
||||
ret = EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
ret = blockif_snapshot(port->bctx, meta);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "%s: failed to restore blockif\r\n",
|
||||
__func__);
|
||||
goto done;
|
||||
}
|
||||
assert(TAILQ_EMPTY(&port->iobhd));
|
||||
}
|
||||
|
||||
done:
|
||||
@ -2835,7 +2699,7 @@ pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
|
||||
|
||||
return (0);
|
||||
}
|
||||
#endif
|
||||
#endif /* BHYVE_SNAPSHOT */
|
||||
|
||||
/*
|
||||
* Use separate emulation names to distinguish drive and atapi devices
|
||||
|
Loading…
Reference in New Issue
Block a user