621b509048
Replace the existing ad-hoc configuration via various global variables with a small database of key-value pairs. The database supports heirarchical keys using a MIB-like syntax to name the path to a given key. Values are always stored as strings. The API used to manage configuation values does include wrappers to handling boolean values. Other values use non-string types require parsing by consumers. The configuration values are stored in a tree using nvlists. Leaf nodes hold string values. Configuration values are permitted to reference other configuration values using '%(name)'. This permits constructing template configurations. All existing command line arguments now set configuration values. For devices, the "-s" option parses its option argument to generate a list of key-value pairs for the given device. A new '-o' command line option permits setting an individual configuration variable. The key name is always given as a full path of dot-separated components. A new '-k' command line option parses a simple configuration file. This configuration file holds a flat list of 'key=value' lines where the 'key' is the full path of a configuration variable. Lines starting with a '#' are comments. In general, bhyve starts by parsing command line options in sequence and applying those settings to configuration values. Once this is complete, bhyve then begins initializing its state based on the configuration values. This means that subsequent configuration options or files may override or supplement previously given settings. A special 'config.dump' configuration value can be set to true to help debug configuration issues. When this value is set, bhyve will print out the configuration variables as a flat list of 'key=value' lines. Most command line argments map to a single configuration variable, e.g. '-w' sets the 'x86.strictmsr' value to false. A few command line arguments have less obvious effects: - Multiple '-p' options append their values (as a comma-seperated list) to "vcpu.N.cpuset" values (where N is a decimal vcpu number). - For '-s' options, a pci.<bus>.<slot>.<function> node is created. The first argument to '-s' (the device type) is used as the value of a "device" variable. Additional comma-separated arguments are then parsed into 'key=value' pairs and used to set additional variables under the device node. A PCI device emulation driver can provide its own hook to override the parsing of the additonal '-s' arguments after the device type. After the configuration phase as completed, the init_pci hook then walks the "pci.<bus>.<slot>.<func>" nodes. It uses the "device" value to find the device model to use. The device model's init routine is passed a reference to its nvlist node in the configuration tree which it can query for specific variables. The result is that a lot of the string parsing is removed from the device models and centralized. In addition, adding a new variable just requires teaching the model to look for the new variable. - For '-l' options, a similar model is used where the string is parsed into values that are later read during initialization. One key note here is that the serial ports use the commonly used lowercase names from existing documentation and examples (e.g. "lpc.com1") instead of the uppercase names previously used internally in bhyve. Reviewed by: grehan MFC after: 3 months Differential Revision: https://reviews.freebsd.org/D26035
491 lines
9.8 KiB
C
491 lines
9.8 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
*
|
|
* Copyright (c) 2011 NetApp, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
/*
|
|
* Micro event library for FreeBSD, designed for a single i/o thread
|
|
* using kqueue, and having events be persistent by default.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <assert.h>
|
|
#ifndef WITHOUT_CAPSICUM
|
|
#include <capsicum_helpers.h>
|
|
#endif
|
|
#include <err.h>
|
|
#include <errno.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <sysexits.h>
|
|
#include <unistd.h>
|
|
|
|
#include <sys/types.h>
|
|
#ifndef WITHOUT_CAPSICUM
|
|
#include <sys/capsicum.h>
|
|
#endif
|
|
#include <sys/event.h>
|
|
#include <sys/time.h>
|
|
|
|
#include <pthread.h>
|
|
#include <pthread_np.h>
|
|
|
|
#include "mevent.h"
|
|
|
|
#define MEVENT_MAX 64
|
|
|
|
static pthread_t mevent_tid;
|
|
static int mevent_timid = 43;
|
|
static int mevent_pipefd[2];
|
|
static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
struct mevent {
|
|
void (*me_func)(int, enum ev_type, void *);
|
|
#define me_msecs me_fd
|
|
int me_fd;
|
|
int me_timid;
|
|
enum ev_type me_type;
|
|
void *me_param;
|
|
int me_cq;
|
|
int me_state; /* Desired kevent flags. */
|
|
int me_closefd;
|
|
LIST_ENTRY(mevent) me_list;
|
|
};
|
|
|
|
static LIST_HEAD(listhead, mevent) global_head, change_head;
|
|
|
|
static void
|
|
mevent_qlock(void)
|
|
{
|
|
pthread_mutex_lock(&mevent_lmutex);
|
|
}
|
|
|
|
static void
|
|
mevent_qunlock(void)
|
|
{
|
|
pthread_mutex_unlock(&mevent_lmutex);
|
|
}
|
|
|
|
static void
|
|
mevent_pipe_read(int fd, enum ev_type type, void *param)
|
|
{
|
|
char buf[MEVENT_MAX];
|
|
int status;
|
|
|
|
/*
|
|
* Drain the pipe read side. The fd is non-blocking so this is
|
|
* safe to do.
|
|
*/
|
|
do {
|
|
status = read(fd, buf, sizeof(buf));
|
|
} while (status == MEVENT_MAX);
|
|
}
|
|
|
|
static void
|
|
mevent_notify(void)
|
|
{
|
|
char c = '\0';
|
|
|
|
/*
|
|
* If calling from outside the i/o thread, write a byte on the
|
|
* pipe to force the i/o thread to exit the blocking kevent call.
|
|
*/
|
|
if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
|
|
write(mevent_pipefd[1], &c, 1);
|
|
}
|
|
}
|
|
|
|
static int
|
|
mevent_kq_filter(struct mevent *mevp)
|
|
{
|
|
int retval;
|
|
|
|
retval = 0;
|
|
|
|
if (mevp->me_type == EVF_READ)
|
|
retval = EVFILT_READ;
|
|
|
|
if (mevp->me_type == EVF_WRITE)
|
|
retval = EVFILT_WRITE;
|
|
|
|
if (mevp->me_type == EVF_TIMER)
|
|
retval = EVFILT_TIMER;
|
|
|
|
if (mevp->me_type == EVF_SIGNAL)
|
|
retval = EVFILT_SIGNAL;
|
|
|
|
return (retval);
|
|
}
|
|
|
|
static int
|
|
mevent_kq_flags(struct mevent *mevp)
|
|
{
|
|
return (mevp->me_state);
|
|
}
|
|
|
|
static int
|
|
mevent_kq_fflags(struct mevent *mevp)
|
|
{
|
|
/* XXX nothing yet, perhaps EV_EOF for reads ? */
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
mevent_build(int mfd, struct kevent *kev)
|
|
{
|
|
struct mevent *mevp, *tmpp;
|
|
int i;
|
|
|
|
i = 0;
|
|
|
|
mevent_qlock();
|
|
|
|
LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
|
|
if (mevp->me_closefd) {
|
|
/*
|
|
* A close of the file descriptor will remove the
|
|
* event
|
|
*/
|
|
close(mevp->me_fd);
|
|
} else {
|
|
if (mevp->me_type == EVF_TIMER) {
|
|
kev[i].ident = mevp->me_timid;
|
|
kev[i].data = mevp->me_msecs;
|
|
} else {
|
|
kev[i].ident = mevp->me_fd;
|
|
kev[i].data = 0;
|
|
}
|
|
kev[i].filter = mevent_kq_filter(mevp);
|
|
kev[i].flags = mevent_kq_flags(mevp);
|
|
kev[i].fflags = mevent_kq_fflags(mevp);
|
|
kev[i].udata = mevp;
|
|
i++;
|
|
}
|
|
|
|
mevp->me_cq = 0;
|
|
LIST_REMOVE(mevp, me_list);
|
|
|
|
if (mevp->me_state & EV_DELETE) {
|
|
free(mevp);
|
|
} else {
|
|
/*
|
|
* We need to add the event only once, so we can
|
|
* reset the EV_ADD bit after it has been propagated
|
|
* to the kevent() arguments the first time.
|
|
*/
|
|
mevp->me_state &= ~EV_ADD;
|
|
LIST_INSERT_HEAD(&global_head, mevp, me_list);
|
|
}
|
|
|
|
assert(i < MEVENT_MAX);
|
|
}
|
|
|
|
mevent_qunlock();
|
|
|
|
return (i);
|
|
}
|
|
|
|
static void
|
|
mevent_handle(struct kevent *kev, int numev)
|
|
{
|
|
struct mevent *mevp;
|
|
int i;
|
|
|
|
for (i = 0; i < numev; i++) {
|
|
mevp = kev[i].udata;
|
|
|
|
/* XXX check for EV_ERROR ? */
|
|
|
|
(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
|
|
}
|
|
}
|
|
|
|
static struct mevent *
|
|
mevent_add_state(int tfd, enum ev_type type,
|
|
void (*func)(int, enum ev_type, void *), void *param,
|
|
int state)
|
|
{
|
|
struct mevent *lp, *mevp;
|
|
|
|
if (tfd < 0 || func == NULL) {
|
|
return (NULL);
|
|
}
|
|
|
|
mevp = NULL;
|
|
|
|
mevent_qlock();
|
|
|
|
/*
|
|
* Verify that the fd/type tuple is not present in any list
|
|
*/
|
|
LIST_FOREACH(lp, &global_head, me_list) {
|
|
if (type != EVF_TIMER && lp->me_fd == tfd &&
|
|
lp->me_type == type) {
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
LIST_FOREACH(lp, &change_head, me_list) {
|
|
if (type != EVF_TIMER && lp->me_fd == tfd &&
|
|
lp->me_type == type) {
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Allocate an entry, populate it, and add it to the change list.
|
|
*/
|
|
mevp = calloc(1, sizeof(struct mevent));
|
|
if (mevp == NULL) {
|
|
goto exit;
|
|
}
|
|
|
|
if (type == EVF_TIMER) {
|
|
mevp->me_msecs = tfd;
|
|
mevp->me_timid = mevent_timid++;
|
|
} else
|
|
mevp->me_fd = tfd;
|
|
mevp->me_type = type;
|
|
mevp->me_func = func;
|
|
mevp->me_param = param;
|
|
|
|
LIST_INSERT_HEAD(&change_head, mevp, me_list);
|
|
mevp->me_cq = 1;
|
|
mevp->me_state = state;
|
|
mevent_notify();
|
|
|
|
exit:
|
|
mevent_qunlock();
|
|
|
|
return (mevp);
|
|
}
|
|
|
|
struct mevent *
|
|
mevent_add(int tfd, enum ev_type type,
|
|
void (*func)(int, enum ev_type, void *), void *param)
|
|
{
|
|
|
|
return (mevent_add_state(tfd, type, func, param, EV_ADD));
|
|
}
|
|
|
|
struct mevent *
|
|
mevent_add_disabled(int tfd, enum ev_type type,
|
|
void (*func)(int, enum ev_type, void *), void *param)
|
|
{
|
|
|
|
return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE));
|
|
}
|
|
|
|
static int
|
|
mevent_update(struct mevent *evp, bool enable)
|
|
{
|
|
int newstate;
|
|
|
|
mevent_qlock();
|
|
|
|
/*
|
|
* It's not possible to enable/disable a deleted event
|
|
*/
|
|
assert((evp->me_state & EV_DELETE) == 0);
|
|
|
|
newstate = evp->me_state;
|
|
if (enable) {
|
|
newstate |= EV_ENABLE;
|
|
newstate &= ~EV_DISABLE;
|
|
} else {
|
|
newstate |= EV_DISABLE;
|
|
newstate &= ~EV_ENABLE;
|
|
}
|
|
|
|
/*
|
|
* No update needed if state isn't changing
|
|
*/
|
|
if (evp->me_state != newstate) {
|
|
evp->me_state = newstate;
|
|
|
|
/*
|
|
* Place the entry onto the changed list if not
|
|
* already there.
|
|
*/
|
|
if (evp->me_cq == 0) {
|
|
evp->me_cq = 1;
|
|
LIST_REMOVE(evp, me_list);
|
|
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
|
mevent_notify();
|
|
}
|
|
}
|
|
|
|
mevent_qunlock();
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
mevent_enable(struct mevent *evp)
|
|
{
|
|
|
|
return (mevent_update(evp, true));
|
|
}
|
|
|
|
int
|
|
mevent_disable(struct mevent *evp)
|
|
{
|
|
|
|
return (mevent_update(evp, false));
|
|
}
|
|
|
|
static int
|
|
mevent_delete_event(struct mevent *evp, int closefd)
|
|
{
|
|
mevent_qlock();
|
|
|
|
/*
|
|
* Place the entry onto the changed list if not already there, and
|
|
* mark as to be deleted.
|
|
*/
|
|
if (evp->me_cq == 0) {
|
|
evp->me_cq = 1;
|
|
LIST_REMOVE(evp, me_list);
|
|
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
|
mevent_notify();
|
|
}
|
|
evp->me_state = EV_DELETE;
|
|
|
|
if (closefd)
|
|
evp->me_closefd = 1;
|
|
|
|
mevent_qunlock();
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
mevent_delete(struct mevent *evp)
|
|
{
|
|
|
|
return (mevent_delete_event(evp, 0));
|
|
}
|
|
|
|
int
|
|
mevent_delete_close(struct mevent *evp)
|
|
{
|
|
|
|
return (mevent_delete_event(evp, 1));
|
|
}
|
|
|
|
static void
|
|
mevent_set_name(void)
|
|
{
|
|
|
|
pthread_set_name_np(mevent_tid, "mevent");
|
|
}
|
|
|
|
void
|
|
mevent_dispatch(void)
|
|
{
|
|
struct kevent changelist[MEVENT_MAX];
|
|
struct kevent eventlist[MEVENT_MAX];
|
|
struct mevent *pipev;
|
|
int mfd;
|
|
int numev;
|
|
int ret;
|
|
#ifndef WITHOUT_CAPSICUM
|
|
cap_rights_t rights;
|
|
#endif
|
|
|
|
mevent_tid = pthread_self();
|
|
mevent_set_name();
|
|
|
|
mfd = kqueue();
|
|
assert(mfd > 0);
|
|
|
|
#ifndef WITHOUT_CAPSICUM
|
|
cap_rights_init(&rights, CAP_KQUEUE);
|
|
if (caph_rights_limit(mfd, &rights) == -1)
|
|
errx(EX_OSERR, "Unable to apply rights for sandbox");
|
|
#endif
|
|
|
|
/*
|
|
* Open the pipe that will be used for other threads to force
|
|
* the blocking kqueue call to exit by writing to it. Set the
|
|
* descriptor to non-blocking.
|
|
*/
|
|
ret = pipe(mevent_pipefd);
|
|
if (ret < 0) {
|
|
perror("pipe");
|
|
exit(0);
|
|
}
|
|
|
|
#ifndef WITHOUT_CAPSICUM
|
|
cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
|
|
if (caph_rights_limit(mevent_pipefd[0], &rights) == -1)
|
|
errx(EX_OSERR, "Unable to apply rights for sandbox");
|
|
if (caph_rights_limit(mevent_pipefd[1], &rights) == -1)
|
|
errx(EX_OSERR, "Unable to apply rights for sandbox");
|
|
#endif
|
|
|
|
/*
|
|
* Add internal event handler for the pipe write fd
|
|
*/
|
|
pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
|
|
assert(pipev != NULL);
|
|
|
|
for (;;) {
|
|
/*
|
|
* Build changelist if required.
|
|
* XXX the changelist can be put into the blocking call
|
|
* to eliminate the extra syscall. Currently better for
|
|
* debug.
|
|
*/
|
|
numev = mevent_build(mfd, changelist);
|
|
if (numev) {
|
|
ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
|
|
if (ret == -1) {
|
|
perror("Error return from kevent change");
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Block awaiting events
|
|
*/
|
|
ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
|
|
if (ret == -1 && errno != EINTR) {
|
|
perror("Error return from kevent monitor");
|
|
}
|
|
|
|
/*
|
|
* Handle reported events
|
|
*/
|
|
mevent_handle(eventlist, ret);
|
|
}
|
|
}
|