John Baldwin 483d953a86 Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed.  In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken).  A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.

To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.

While the current implementation is useful for several uses cases, it
has a few limitations.  The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system).  In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions.  The file format also does not currently support
versioning of individual chunks of state.  As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files.  The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility.  As a result, the current implementation is not enabled
by default.  It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.

Submitted by:	Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by:	Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes:	yes
Sponsored by:	University Politehnica of Bucharest
Sponsored by:	Matthew Grooms (student scholarships)
Sponsored by:	iXsystems
Differential Revision:	https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00

122 lines
3.1 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2014, Neel Natu (neel@freebsd.org)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include "vpmtmr.h"
/*
* The ACPI Power Management timer is a free-running 24- or 32-bit
* timer with a frequency of 3.579545MHz
*
* This implementation will be 32-bits
*/
#define PMTMR_FREQ 3579545 /* 3.579545MHz */
struct vpmtmr {
sbintime_t freq_sbt;
sbintime_t baseuptime;
uint32_t baseval;
};
static MALLOC_DEFINE(M_VPMTMR, "vpmtmr", "bhyve virtual acpi timer");
struct vpmtmr *
vpmtmr_init(struct vm *vm)
{
struct vpmtmr *vpmtmr;
struct bintime bt;
vpmtmr = malloc(sizeof(struct vpmtmr), M_VPMTMR, M_WAITOK | M_ZERO);
vpmtmr->baseuptime = sbinuptime();
vpmtmr->baseval = 0;
FREQ2BT(PMTMR_FREQ, &bt);
vpmtmr->freq_sbt = bttosbt(bt);
return (vpmtmr);
}
void
vpmtmr_cleanup(struct vpmtmr *vpmtmr)
{
free(vpmtmr, M_VPMTMR);
}
int
vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val)
{
struct vpmtmr *vpmtmr;
sbintime_t now, delta;
if (!in || bytes != 4)
return (-1);
vpmtmr = vm_pmtmr(vm);
/*
* No locking needed because 'baseuptime' and 'baseval' are
* written only during initialization.
*/
now = sbinuptime();
delta = now - vpmtmr->baseuptime;
KASSERT(delta >= 0, ("vpmtmr_handler: uptime went backwards: "
"%#lx to %#lx", vpmtmr->baseuptime, now));
*val = vpmtmr->baseval + delta / vpmtmr->freq_sbt;
return (0);
}
#ifdef BHYVE_SNAPSHOT
int
vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta)
{
int ret;
SNAPSHOT_VAR_OR_LEAVE(vpmtmr->baseval, meta, ret, done);
done:
return (ret);
}
#endif