Sync: merge r214077 through r214219 from ^/head.

This commit is contained in:
Dimitry Andric 2010-10-22 20:02:18 +00:00
commit 19fe8e8483
126 changed files with 10079 additions and 4856 deletions

View File

@ -14,6 +14,8 @@
# The file is partitioned: OLD_FILES first, then OLD_LIBS and OLD_DIRS last.
#
# 20101020: catch up with vm_page_sleep_if_busy rename
OLD_FILES+=usr/share/man/man9/vm_page_sleep_busy.9.gz
# 20101011: removed subblock.h from liblzma
OLD_FILES+=usr/include/lzma/subblock.h
# 20101002: removed manpath.config

View File

@ -65,7 +65,7 @@ futx_open(const char *file)
return (fp);
}
static void
static int
utx_active_add(const struct futx *fu)
{
FILE *fp;
@ -78,7 +78,7 @@ utx_active_add(const struct futx *fu)
*/
fp = futx_open(_PATH_UTX_ACTIVE);
if (fp == NULL)
return;
return (1);
while (fread(&fe, sizeof fe, 1, fp) == 1) {
switch (fe.fu_type) {
case USER_PROCESS:
@ -110,6 +110,7 @@ utx_active_add(const struct futx *fu)
exact:
fwrite(fu, sizeof *fu, 1, fp);
fclose(fp);
return (0);
}
static int
@ -123,7 +124,7 @@ utx_active_remove(struct futx *fu)
*/
fp = futx_open(_PATH_UTX_ACTIVE);
if (fp == NULL)
return (0);
return (1);
while (fread(&fe, sizeof fe, 1, fp) == 1) {
switch (fe.fu_type) {
case USER_PROCESS:
@ -151,7 +152,7 @@ utx_active_purge(void)
truncate(_PATH_UTX_ACTIVE, 0);
}
static void
static int
utx_lastlogin_add(const struct futx *fu)
{
FILE *fp;
@ -164,7 +165,7 @@ utx_lastlogin_add(const struct futx *fu)
*/
fp = futx_open(_PATH_UTX_LASTLOGIN);
if (fp == NULL)
return;
return (1);
while (fread(&fe, sizeof fe, 1, fp) == 1) {
if (strncmp(fu->fu_user, fe.fu_user, sizeof fe.fu_user) != 0)
continue;
@ -175,6 +176,7 @@ utx_lastlogin_add(const struct futx *fu)
}
fwrite(fu, sizeof *fu, 1, fp);
fclose(fp);
return (0);
}
static void
@ -197,7 +199,7 @@ utx_lastlogin_upgrade(void)
_close(fd);
}
static void
static int
utx_log_add(const struct futx *fu)
{
int fd;
@ -219,15 +221,17 @@ utx_log_add(const struct futx *fu)
fd = _open(_PATH_UTX_LOG, O_CREAT|O_WRONLY|O_APPEND, 0644);
if (fd < 0)
return;
return (1);
_writev(fd, vec, 2);
_close(fd);
return (0);
}
struct utmpx *
pututxline(const struct utmpx *utmpx)
{
struct futx fu;
int bad = 0;
utx_to_futx(utmpx, &fu);
@ -241,16 +245,21 @@ pututxline(const struct utmpx *utmpx)
case NEW_TIME:
break;
case USER_PROCESS:
utx_active_add(&fu);
utx_lastlogin_add(&fu);
bad |= utx_active_add(&fu);
bad |= utx_lastlogin_add(&fu);
break;
#if 0 /* XXX: Are these records of any use to us? */
case INIT_PROCESS:
case LOGIN_PROCESS:
utx_active_add(&fu);
bad |= utx_active_add(&fu);
break;
#endif
case DEAD_PROCESS:
/*
* In case writing a logout entry fails, never attempt
* to write it to utx.log. The logout entry's ut_id
* might be invalid.
*/
if (utx_active_remove(&fu) != 0)
return (NULL);
break;
@ -258,6 +267,6 @@ pututxline(const struct utmpx *utmpx)
return (NULL);
}
utx_log_add(&fu);
return (futx_to_utx(&fu));
bad |= utx_log_add(&fu);
return (bad ? NULL : futx_to_utx(&fu));
}

View File

@ -177,10 +177,8 @@
#define pthread_rwlock_unlock _pthread_rwlock_unlock
#define pthread_rwlock_wrlock _pthread_rwlock_wrlock
#define pthread_rwlockattr_destroy _pthread_rwlockattr_destroy
#define pthread_rwlockattr_getkind_np _pthread_rwlockattr_getkind_np
#define pthread_rwlockattr_getpshared _pthread_rwlockattr_getpshared
#define pthread_rwlockattr_init _pthread_rwlockattr_init
#define pthread_rwlockattr_setkind_np _pthread_rwlockattr_setkind_np
#define pthread_rwlockattr_setpshared _pthread_rwlockattr_setpshared
#define pthread_self _pthread_self
#define pthread_set_name_np _pthread_set_name_np

View File

@ -158,10 +158,8 @@
#undef pthread_rwlock_unlock
#undef pthread_rwlock_wrlock
#undef pthread_rwlockattr_destroy
#undef pthread_rwlockattr_getkind_np
#undef pthread_rwlockattr_getpshared
#undef pthread_rwlockattr_init
#undef pthread_rwlockattr_setkind_np
#undef pthread_rwlockattr_setpshared
#undef pthread_self
#undef pthread_set_name_np

View File

@ -12,7 +12,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
.\" $OpenBSD: strtonum.3,v 1.12 2005/10/26 11:37:58 jmc Exp $
.\" $OpenBSD: strtonum.3,v 1.13 2006/04/25 05:15:42 tedu Exp $
.\" $FreeBSD$
.\"
.Dd April 29, 2004
@ -23,7 +23,6 @@
.Nd "reliably convert string value to an integer"
.Sh SYNOPSIS
.In stdlib.h
.In limits.h
.Ft long long
.Fo strtonum
.Fa "const char *nptr"

View File

@ -129,10 +129,10 @@ or
.Dv PRIO_USER .
.El
.Pp
.Bl -tag -width Er
In addition to the errors indicated above,
.Fn setpriority
will fail if:
.Bl -tag -width Er
.It Bq Er EPERM
A process was located, but neither its effective nor real user
ID matched the effective user ID of the caller.

View File

@ -89,7 +89,6 @@ returns information about the file the link references.
The available values are as follows:
.Pp
.Bl -tag -width 6n
.Pp
.It Li _PC_LINK_MAX
The maximum file link count.
.It Li _PC_MAX_CANON
@ -234,11 +233,11 @@ Too many symbolic links were encountered in translating the pathname.
An I/O error occurred while reading from or writing to the file system.
.El
.Pp
.Bl -tag -width Er
The
.Fn fpathconf
system call
will fail if:
.Bl -tag -width Er
.It Bq Er EBADF
The
.Fa fd

View File

@ -345,10 +345,10 @@ represented correctly in the structure pointed to by
.Fa sb .
.El
.Pp
.Bl -tag -width Er
The
.Fn fstat
system call will fail if:
.Bl -tag -width Er
.It Bq Er EBADF
The
.Fa fd

View File

@ -54,7 +54,7 @@ static char nomemmsg[] = "Could not allocate memory";
void
gctl_dump(struct gctl_req *req, FILE *f)
{
u_int i;
unsigned int i;
int j;
struct gctl_req_arg *ap;
@ -126,10 +126,8 @@ gctl_check_alloc(struct gctl_req *req, void *ptr)
struct gctl_req *
gctl_get_handle(void)
{
struct gctl_req *rp;
rp = calloc(1, sizeof *rp);
return (rp);
return (calloc(1, sizeof(struct gctl_req)));
}
/*
@ -152,33 +150,9 @@ gctl_new_arg(struct gctl_req *req)
return (ap);
}
void
gctl_ro_param(struct gctl_req *req, const char *name, int len, const void* value)
{
struct gctl_req_arg *ap;
if (req == NULL || req->error != NULL)
return;
ap = gctl_new_arg(req);
if (ap == NULL)
return;
ap->name = strdup(name);
gctl_check_alloc(req, ap->name);
if (ap->name == NULL)
return;
ap->nlen = strlen(ap->name) + 1;
ap->value = __DECONST(void *, value);
ap->flag = GCTL_PARAM_RD;
if (len >= 0)
ap->len = len;
else if (len < 0) {
ap->flag |= GCTL_PARAM_ASCII;
ap->len = strlen(value) + 1;
}
}
void
gctl_rw_param(struct gctl_req *req, const char *name, int len, void* value)
static void
gctl_param_add(struct gctl_req *req, const char *name, int len, void *value,
int flag)
{
struct gctl_req_arg *ap;
@ -193,11 +167,27 @@ gctl_rw_param(struct gctl_req *req, const char *name, int len, void* value)
return;
ap->nlen = strlen(ap->name) + 1;
ap->value = value;
ap->flag = GCTL_PARAM_RW;
ap->flag = flag;
if (len >= 0)
ap->len = len;
else if (len < 0)
else if (len < 0) {
ap->flag |= GCTL_PARAM_ASCII;
ap->len = strlen(value) + 1;
}
}
void
gctl_ro_param(struct gctl_req *req, const char *name, int len, const void* value)
{
gctl_param_add(req, name, len, __DECONST(void *, value), GCTL_PARAM_RD);
}
void
gctl_rw_param(struct gctl_req *req, const char *name, int len, void *value)
{
gctl_param_add(req, name, len, value, GCTL_PARAM_RW);
}
const char *
@ -233,7 +223,7 @@ gctl_issue(struct gctl_req *req)
void
gctl_free(struct gctl_req *req)
{
u_int i;
unsigned int i;
if (req == NULL)
return;

View File

@ -318,9 +318,7 @@ FBSDprivate_1.0 {
_pthread_rwlock_wrlock;
_pthread_rwlockattr_destroy;
_pthread_rwlockattr_getpshared;
_pthread_rwlockattr_getkind_np;
_pthread_rwlockattr_init;
_pthread_rwlockattr_setkind_np;
_pthread_rwlockattr_setpshared;
_pthread_self;
_pthread_set_name_np;
@ -403,8 +401,6 @@ FBSD_1.1 {
FBSD_1.2 {
openat;
pthread_rwlockattr_getkind_np;
pthread_rwlockattr_setkind_np;
setcontext;
swapcontext;
};

View File

@ -285,14 +285,11 @@ struct pthread_prio {
struct pthread_rwlockattr {
int pshared;
int kind;
};
struct pthread_rwlock {
struct urwlock lock;
struct pthread *owner;
int recurse;
int kind;
};
/*

View File

@ -63,19 +63,13 @@ __weak_reference(_pthread_rwlock_timedwrlock, pthread_rwlock_timedwrlock);
*/
static int
rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr)
rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr __unused)
{
pthread_rwlock_t prwlock;
prwlock = (pthread_rwlock_t)calloc(1, sizeof(struct pthread_rwlock));
if (prwlock == NULL)
return (ENOMEM);
if (attr != NULL)
prwlock->kind = (*attr)->kind;
else
prwlock->kind = PTHREAD_RWLOCK_DEFAULT_NP;
if (prwlock->kind == PTHREAD_RWLOCK_PREFER_READER_NP)
prwlock->lock.rw_flags |= URWLOCK_PREFER_READER;
*rwlock = prwlock;
return (0);
}
@ -118,7 +112,7 @@ init_static(struct pthread *thread, pthread_rwlock_t *rwlock)
}
int
_pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr)
_pthread_rwlock_init (pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr)
{
*rwlock = NULL;
return (rwlock_init(rwlock, attr));
@ -266,14 +260,6 @@ rwlock_wrlock_common (pthread_rwlock_t *rwlock, const struct timespec *abstime)
CHECK_AND_INIT_RWLOCK
if (__predict_false(prwlock->owner == curthread)) {
if (__predict_false(
prwlock->kind == PTHREAD_RWLOCK_PREFER_WRITER_NP)) {
prwlock->recurse++;
return (0);
}
}
/*
* POSIX said the validity of the abstimeout parameter need
* not be checked if the lock can be immediately acquired.
@ -349,13 +335,6 @@ _pthread_rwlock_unlock (pthread_rwlock_t *rwlock)
if (state & URWLOCK_WRITE_OWNER) {
if (__predict_false(prwlock->owner != curthread))
return (EPERM);
if (__predict_false(
prwlock->kind == PTHREAD_RWLOCK_PREFER_WRITER_NP)) {
if (prwlock->recurse > 0) {
prwlock->recurse--;
return (0);
}
}
prwlock->owner = NULL;
}

View File

@ -36,10 +36,8 @@
__weak_reference(_pthread_rwlockattr_destroy, pthread_rwlockattr_destroy);
__weak_reference(_pthread_rwlockattr_getpshared, pthread_rwlockattr_getpshared);
__weak_reference(_pthread_rwlockattr_getkind_np, pthread_rwlockattr_getkind_np);
__weak_reference(_pthread_rwlockattr_init, pthread_rwlockattr_init);
__weak_reference(_pthread_rwlockattr_setpshared, pthread_rwlockattr_setpshared);
__weak_reference(_pthread_rwlockattr_setkind_np, pthread_rwlockattr_setkind_np);
int
_pthread_rwlockattr_destroy(pthread_rwlockattr_t *rwlockattr)
@ -83,7 +81,6 @@ _pthread_rwlockattr_init(pthread_rwlockattr_t *rwlockattr)
return(ENOMEM);
prwlockattr->pshared = PTHREAD_PROCESS_PRIVATE;
prwlockattr->kind = PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP;
*rwlockattr = prwlockattr;
return(0);
@ -100,22 +97,3 @@ _pthread_rwlockattr_setpshared(pthread_rwlockattr_t *rwlockattr, int pshared)
return(0);
}
int
_pthread_rwlockattr_setkind_np(pthread_rwlockattr_t *attr, int kind)
{
if (kind != PTHREAD_RWLOCK_PREFER_READER_NP &&
kind != PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP &&
kind != PTHREAD_RWLOCK_PREFER_WRITER_NP) {
return (EINVAL);
}
(*attr)->kind = kind;
return (0);
}
int
_pthread_rwlockattr_getkind_np(const pthread_rwlockattr_t *attr, int *kind)
{
*kind = (*attr)->kind;
return (0);
}

View File

@ -34,7 +34,6 @@ LDFLAGS+= -shared -Wl,-Bsymbolic
DPADD= ${LIBC_PIC}
LDADD= -lc_pic -lssp_nonshared
.if ${MACHINE_CPUARCH} != "ia64"
.if ${MK_SYMVER} == "yes"
LIBCDIR= ${.CURDIR}/../../lib/libc
VERSION_DEF= ${LIBCDIR}/Versions.def
@ -48,7 +47,6 @@ ${PROG}: ${VERSION_MAP}
SYMBOL_MAPS+= ${.CURDIR}/${RTLD_ARCH}/Symbol.map
.endif
.endif
.endif
.sinclude "${.CURDIR}/${RTLD_ARCH}/Makefile.inc"

View File

@ -195,9 +195,22 @@ reloc_non_plt_obj(Obj_Entry *obj_rtld, Obj_Entry *obj, const Elf_Rela *rela,
int sym_index;
def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
false, cache);
if (def == NULL)
return -1;
true, cache);
if (def == NULL) {
/*
* XXX r_debug_state is problematic and find_symdef()
* returns NULL for it. This probably has something to
* do with symbol versioning (r_debug_state is in the
* symbol map). If we return -1 in that case we abort
* relocating rtld, which typically is fatal. So, for
* now just skip the symbol when we're relocating
* rtld. We don't care about r_debug_state unless we
* are being debugged.
*/
if (obj != obj_rtld)
return -1;
break;
}
if (def->st_shndx != SHN_UNDEF) {
target = (Elf_Addr)(defobj->relocbase + def->st_value);

View File

@ -73,8 +73,8 @@ The
utility may be used on the root file system in single-user mode
but the system should be rebooted via ``reboot -n'' afterwards.
.Sh OPTIONS
.Bl -tag -width indent
The available options are as follows:
.Bl -tag -width indent
.It Fl b
Use the default block size (usually 512 bytes) instead
of the value gleaned from the disklabel.

View File

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd September 25, 2010
.Dd October 20, 2010
.Dt GELI 8
.Os
.Sh NAME
@ -119,6 +119,16 @@ utility:
.Ar file
.Ar prov
.Nm
.Cm suspend
.Op Fl v
.Fl a | Ar prov ...
.Nm
.Cm resume
.Op Fl pv
.Op Fl j Ar passfile
.Op Fl k Ar keyfile
.Ar prov
.Nm
.Cm resize
.Op Fl v
.Fl s Ar oldsize
@ -207,6 +217,8 @@ Allows to attach a provider with a random, one-time key - useful for swap
partitions and temporary file systems.
.It
Allows to verify data integrity (data authentication).
.It
Allows to suspend and resume encrypted devices.
.El
.Pp
The first argument to
@ -458,6 +470,8 @@ will not be detached even if all keys will be destroyed.
It can be even rescued with the
.Cm setkey
subcommand.
.Pp
Additional options include:
.Bl -tag -width ".Fl a Ar keyno"
.It Fl a
Destroy all keys (does not need
@ -482,6 +496,8 @@ backup, your data is gone for good.
In case the provider was attached with the
.Fl r
flag, the keys will not be destroyed, only the provider will be detached.
.Pp
Additional options include:
.Bl -tag -width ".Fl a"
.It Fl a
If specified, all currently attached providers will be killed.
@ -490,6 +506,8 @@ If specified, all currently attached providers will be killed.
Backup metadata from the given provider to the given file.
.It Cm restore
Restore metadata from the given file to the given provider.
.Pp
Additional options include:
.Bl -tag -width ".Fl f"
.It Fl f
Metadata contains the size of the provider to ensure that the correct
@ -508,12 +526,73 @@ through
and
.Cm restore .
.El
.It Cm suspend
Suspend device by waiting for all inflight request to finish, clearing all
sensitive informations (like keys) from the kernel memory and blocking all
further I/O requests until the
.Cm resume
subcommand is executed.
This functionality is useful for eg. laptops - when one wants to suspend a
laptop, one does not want to leave encrypted device attached.
Instead of closing all files and directories opened from a file system placed
on an encrypted device, unmounting the file system and detaching the device,
the
.Cm suspend
subcommand can be used.
Any access to the encrypted device will be blocked until the keys are
recovered through
.Cm resume
subcommand, thus there is no need to close nor unmount anything.
The
.Cm suspend
subcommand does not work with devices created with the
.Cm onetime
subcommand.
Please note that sensitive data might still be present in memory after
suspending encrypted device, because of file system cache, etc.
.Pp
Additional options include:
.Bl -tag -width ".Fl a"
.It Fl a
Suspend all
.Nm
devices.
.El
.It Cm resume
Resume previously suspended device.
The caller must ensure that executing this subcommand won't try to access
suspended device, which will lead to a deadlock.
For example suspending device, which contains file system where the
.Nm
utility is stored is bad idea.
.Pp
Additional options include:
.Bl -tag -width ".Fl j Ar passfile"
.It Fl j Ar passfile
Specifies a file which contains the passphrase or its part.
For more information see the description of the
.Fl J
option for the
.Cm init
subcommand.
.It Fl k Ar keyfile
Specifies a file which contains part of the key.
For more information see the description of the
.Fl K
option for the
.Cm init
subcommand.
.It Fl p
Do not use passphrase as the key component.
.El
.It Cm resize
Inform
.Nm
that the provider has been resized.
The old metadata block is relocated to the correct position at the end of the
provider and the provider size is updated.
.Pp
Additional options include:
.Bl -tag -width ".Fl s Ar oldsize"
.It Fl s Ar oldsize
The size of the provider before it was resized.
@ -746,6 +825,19 @@ prompt:
# geli attach da0
Enter passphrase: foobar
.Ed
.Pp
Suspend all
.Nm
devices, suspend a laptop, then resume devices one by one after resuming a
laptop:
.Bd -literal -offset indent
# geli suspend -a
# zzz
<resume your laptop>
# geli resume -p -k keyfile gpt/secret
# geli resume gpt/private
Enter passphrase:
.Ed
.Sh ENCRYPTION MODES
.Nm
supports two encryption modes:

View File

@ -67,6 +67,7 @@ static void eli_attach(struct gctl_req *req);
static void eli_configure(struct gctl_req *req);
static void eli_setkey(struct gctl_req *req);
static void eli_delkey(struct gctl_req *req);
static void eli_resume(struct gctl_req *req);
static void eli_kill(struct gctl_req *req);
static void eli_backup(struct gctl_req *req);
static void eli_restore(struct gctl_req *req);
@ -89,6 +90,8 @@ static int eli_backup_create(struct gctl_req *req, const char *prov,
* configure [-bB] prov ...
* setkey [-pPv] [-n keyno] [-j passfile] [-J newpassfile] [-k keyfile] [-K newkeyfile] prov
* delkey [-afv] [-n keyno] prov
* suspend [-v] -a | prov ...
* resume [-pv] [-j passfile] [-k keyfile] prov
* kill [-av] [prov ...]
* backup [-v] prov file
* restore [-fv] file prov
@ -198,6 +201,22 @@ struct g_command class_commands[] = {
},
"[-afv] [-n keyno] prov"
},
{ "suspend", G_FLAG_VERBOSE, NULL,
{
{ 'a', "all", NULL, G_TYPE_BOOL },
G_OPT_SENTINEL
},
"[-v] -a | prov ..."
},
{ "resume", G_FLAG_VERBOSE, eli_main,
{
{ 'j', "passfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
{ 'k', "keyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
{ 'p', "nopassphrase", NULL, G_TYPE_BOOL },
G_OPT_SENTINEL
},
"[-pv] [-j passfile] [-k keyfile] prov"
},
{ "kill", G_FLAG_VERBOSE, eli_main,
{
{ 'a', "all", NULL, G_TYPE_BOOL },
@ -280,6 +299,8 @@ eli_main(struct gctl_req *req, unsigned int flags)
eli_setkey(req);
else if (strcmp(name, "delkey") == 0)
eli_delkey(req);
else if (strcmp(name, "resume") == 0)
eli_resume(req);
else if (strcmp(name, "kill") == 0)
eli_kill(req);
else if (strcmp(name, "backup") == 0)
@ -1118,6 +1139,44 @@ eli_delkey(struct gctl_req *req)
eli_delkey_detached(req, prov);
}
static void
eli_resume(struct gctl_req *req)
{
struct g_eli_metadata md;
unsigned char key[G_ELI_USERKEYLEN];
const char *prov;
off_t mediasize;
int nargs;
nargs = gctl_get_int(req, "nargs");
if (nargs != 1) {
gctl_error(req, "Invalid number of arguments.");
return;
}
prov = gctl_get_ascii(req, "arg0");
if (eli_metadata_read(req, prov, &md) == -1)
return;
mediasize = g_get_mediasize(prov);
if (md.md_provsize != (uint64_t)mediasize) {
gctl_error(req, "Provider size mismatch.");
return;
}
if (eli_genkey(req, &md, key, false) == NULL) {
bzero(key, sizeof(key));
return;
}
gctl_ro_param(req, "key", sizeof(key), key);
if (gctl_issue(req) == NULL) {
if (verbose)
printf("Resumed %s.\n", prov);
}
bzero(key, sizeof(key));
}
static int
eli_trash_metadata(struct gctl_req *req, const char *prov, int fd, off_t offset)
{

View File

@ -88,32 +88,19 @@ static void hook_free(struct hookproc *hp);
static void
descriptors(void)
{
long maxfd;
int fd;
/*
* Close all descriptors.
* Close all (or almost all) descriptors.
*/
maxfd = sysconf(_SC_OPEN_MAX);
if (maxfd < 0) {
pjdlog_errno(LOG_WARNING, "sysconf(_SC_OPEN_MAX) failed");
maxfd = 1024;
}
for (fd = 0; fd <= maxfd; fd++) {
switch (fd) {
case STDIN_FILENO:
case STDOUT_FILENO:
case STDERR_FILENO:
if (pjdlog_mode_get() == PJDLOG_MODE_STD)
break;
/* FALLTHROUGH */
default:
close(fd);
break;
}
}
if (pjdlog_mode_get() == PJDLOG_MODE_STD)
if (pjdlog_mode_get() == PJDLOG_MODE_STD) {
closefrom(MAX(MAX(STDIN_FILENO, STDOUT_FILENO),
STDERR_FILENO) + 1);
return;
}
closefrom(0);
/*
* Redirect stdin, stdout and stderr to /dev/null.
*/

View File

@ -1009,11 +1009,11 @@ The second format
with multiple addresses) is provided for convenience only and
its use is discouraged.
.It Ar addr : Oo Cm not Oc Bro
.Bl -tag -width indent
.Cm any | me | me6 |
.Cm table Ns Pq Ar number Ns Op , Ns Ar value
.Ar | addr-list | addr-set
.Brc
.Bl -tag -width indent
.It Cm any
matches any IP address.
.It Cm me
@ -2176,7 +2176,6 @@ Finally, the following parameters can be configured for both
pipes and queues:
.Pp
.Bl -tag -width XXXX -compact
.Pp
.It Cm buckets Ar hash-table-size
Specifies the size of the hash table used for storing the
various queues.

View File

@ -412,7 +412,6 @@ Most checks are self-explanatory or can ``never happen''.
Common errors are given below.
.Pp
.Bl -tag -width Ds -compact
.Pp
.It <filename>: not found on tape
The specified file name was listed in the tape directory,
but was not found on the tape.

View File

@ -90,8 +90,8 @@ see
The
.Nm
driver creates the following:
.Bl -tag -width ".Pa /dev/iscsi%dxx" -compact
.Pp
.Bl -tag -width ".Pa /dev/iscsi%dxx" -compact
.It Pa /dev/iscsi
used to create new sessions.
.It Pa /dev/iscsi%d

View File

@ -161,7 +161,7 @@ hint.acpi.0.disabled="1"
.Sh SEE ALSO
.Xr kenv 1 ,
.Xr loader.conf 5 ,
.Xr loader 8,
.Xr loader 8 ,
.Xr resource_int_value 9 .
.Sh HISTORY
The

View File

@ -462,8 +462,8 @@ member in the total struct.
.It Dv p_type
This member of the Phdr struct tells what kind of segment this array
element describes or how to interpret the array element's information.
.Bl -tag -width "PT_DYNAMIC" -compact
.Pp
.Bl -tag -width "PT_DYNAMIC" -compact
.It Dv PT_NULL
The array element is unused and the other members' values are undefined.
This lets the program header have ignored entries.

View File

@ -324,7 +324,7 @@ MAN= accept_filter.9 \
vm_page_lookup.9 \
vm_page_protect.9 \
vm_page_rename.9 \
vm_page_sleep_busy.9 \
vm_page_sleep_if_busy.9 \
vm_page_wakeup.9 \
vm_page_wire.9 \
vm_page_zero_fill.9 \

View File

@ -41,9 +41,9 @@
.In sys/vnode.h
.In vm/vm.h
.Ft int
.Fn VOP_GETPAGES "struct vnode *vp" "vm_page_t *m" "int count" "int reqpage" "vm_ooffset_t offset"
.Fn VOP_GETPAGES "struct vnode *vp" "vm_page_t *ma" "int count" "int reqpage" "vm_ooffset_t offset"
.Ft int
.Fn VOP_PUTPAGES "struct vnode *vp" "vm_page_t *m" "int count" "int sync" "int *rtvals" "vm_ooffset_t offset"
.Fn VOP_PUTPAGES "struct vnode *vp" "vm_page_t *ma" "int count" "int sync" "int *rtvals" "vm_ooffset_t offset"
.Sh DESCRIPTION
The
.Fn VOP_GETPAGES
@ -66,11 +66,11 @@ The arguments are:
.Bl -tag -width reqpage
.It Fa vp
The file to access.
.It Fa m
Pointer to the first element of an array of contiguous pages representing a
.It Fa ma
Pointer to the first element of an array of pages representing a
contiguous region of the file to be read or written.
.It Fa count
The number of pages in the array.
The number of bytes that should be read into the pages of the array.
.It Fa sync
.Dv VM_PAGER_PUT_SYNC
if the write should be synchronous.
@ -123,22 +123,27 @@ The page was not handled by this request.
The
.Fn VOP_GETPAGES
method is expected to release any pages in
.Fa m
.Fa ma
that it does not successfully handle, by calling
.Xr vm_page_free 9 .
When it succeeds,
.Fn VOP_GETPAGES
must set the valid bits appropriately, clear the dirty bit
(using
.Xr vm_page_undirty 9 ) ,
either activate the page (if its wanted bit is set)
must set the valid bits appropriately.
.Fn VOP_GETPAGES
must keep
.Fa reqpage
busy.
It must unbusy all other successfully handled pages and put them
on appropriate page queue(s).
For example,
.Fn VOP_GETPAGES
may either activate a page (if its wanted bit is set)
or deactivate it (otherwise), and finally call
.Xr vm_page_wakeup 9
to arouse any threads currently waiting for the page to be faulted in,
for each page read.
to arouse any threads currently waiting for the page to be faulted in.
.Sh RETURN VALUES
If it successfully reads
.Fa m[reqpage] ,
.Fa ma[reqpage] ,
.Fn VOP_GETPAGES
returns
.Dv VM_PAGER_OK ;

View File

@ -411,8 +411,8 @@ and to delete them later in orderly fashion.
.Pp
There is a set of macros defined
that helps to create oids of given type.
.Bl -tag -width SYSCTL_ADD_STRINGXX
They are as follows:
.Bl -tag -width SYSCTL_ADD_STRINGXX
.It Fn SYSCTL_ADD_OID
creates a raw oid.
This macro is functionally equivalent to the

View File

@ -52,9 +52,9 @@ function lowers the busy count on the page by one, if the resulting busy
count is zero, a
.Xr wakeup 9
will be issued if the page has been marked
.Dv PG_WANTED .
.Dv VPO_WANTED .
A page is typically marked
.Dv PG_WANTED
.Dv VPO_WANTED
by a thread to register its interest in
the page to either complete I/O or becoming available for general use.
.Sh AUTHORS

View File

@ -27,22 +27,22 @@
.\" $FreeBSD$
.\"
.Dd July 13, 2001
.Dt VM_PAGE_SLEEP_BUSY 9
.Dt VM_PAGE_SLEEP_IF_BUSY 9
.Os
.Sh NAME
.Nm vm_page_sleep_busy
.Nm vm_page_sleep_if_busy
.Nd "wait for a busy page to become unbusy"
.Sh SYNOPSIS
.In sys/param.h
.In vm/vm.h
.In vm/vm_page.h
.Ft int
.Fn vm_page_sleep_busy "vm_page_t m" "int also_m_busy" "const char *wmesg"
.Fn vm_page_sleep_if_busy "vm_page_t m" "int also_m_busy" "const char *wmesg"
.Sh DESCRIPTION
The
.Fn vm_page_sleep_busy
.Fn vm_page_sleep_if_busy
function waits until the
.Dv PG_BUSY
.Dv VPO_BUSY
flag is cleared.
If
.Fa also_m_busy
@ -51,7 +51,7 @@ is non-zero, it also waits for
to become zero.
.Sh RETURN VALUES
If
.Fn vm_page_sleep_busy
.Fn vm_page_sleep_if_busy
finds the page busy it returns
.Dv TRUE .
If not, it returns
@ -59,7 +59,7 @@ If not, it returns
Returning
.Dv TRUE
does not necessary mean that
.Fn vm_page_sleep_busy
.Fn vm_page_sleep_if_busy
slept, but only that
.Fn splvm
was called.

View File

@ -50,25 +50,25 @@ of a page.
.Pp
.Fn vm_page_busy
sets the
.Dv PG_BUSY
.Dv VPO_BUSY
flag in the page.
.Pp
.Fn vm_page_flash
checks to see if there is anybody waiting on the page
.Dv ( PG_WANTED
.Dv ( VPO_WANTED
will be set), and if so, clears the
.Dv PG_WANTED
.Dv VPO_WANTED
flag and notifies whoever is waiting via
.Fn wakeup .
.Pp
.Fn vm_page_wakeup
clears the
.Dv PG_BUSY
.Dv VPO_BUSY
flag on the page, and calls
.Fn vm_page_flash
in case somebody has been waiting for it.
.Sh SEE ALSO
.Xr vm_page_sleep_busy 9 ,
.Xr vm_page_sleep_if_busy 9 ,
.Xr wakeup 9
.Sh AUTHORS
This manual page was written by

View File

@ -49,7 +49,7 @@ static int curent, bootonce;
/*
* Buffer below 64kB passed on gptread(), which can hold at least
* one sector od data (512 bytes).
* one sector of data (512 bytes).
*/
static char *secbuf;
@ -62,7 +62,7 @@ gptupdate(const char *which, struct dsk *dskp, struct gpt_hdr *hdr,
/*
* We need to update the following for both primary and backup GPT:
* 1. Sector on disk that contains curent partition.
* 1. Sector on disk that contains current partition.
* 2. Partition table checksum.
* 3. Header checksum.
* 4. Header on disk.

View File

@ -348,7 +348,7 @@ load(void)
return;
p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
bootinfo.bi_symtab = VTOP(p);
memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
*(uint32_t*)p = hdr.ex.a_syms;
p += sizeof(hdr.ex.a_syms);
if (hdr.ex.a_syms) {
if (xfsread(ino, p, hdr.ex.a_syms))
@ -385,7 +385,7 @@ load(void)
if (xfsread(ino, &es, sizeof(es)))
return;
for (i = 0; i < 2; i++) {
memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
*(Elf32_Word *)p = es[i].sh_size;
p += sizeof(es[i].sh_size);
fs_off = es[i].sh_offset;
if (xfsread(ino, p, es[i].sh_size))

View File

@ -3008,19 +3008,20 @@ xen/gnttab.c optional xen | xenhvm
xen/features.c optional xen | xenhvm
xen/evtchn/evtchn.c optional xen
xen/evtchn/evtchn_dev.c optional xen | xenhvm
xen/reboot.c optional xen
xen/xenbus/xenbus_client.c optional xen | xenhvm
xen/xenbus/xenbus_comms.c optional xen | xenhvm
xen/xenbus/xenbus_dev.c optional xen | xenhvm
xen/xenbus/xenbus_if.m optional xen | xenhvm
xen/xenbus/xenbus_probe.c optional xen | xenhvm
#xen/xenbus/xenbus_probe_backend.c optional xen
xen/xenbus/xenbus_xs.c optional xen | xenhvm
xen/xenbus/xenbus.c optional xen | xenhvm
xen/xenbus/xenbusb_if.m optional xen | xenhvm
xen/xenbus/xenbusb.c optional xen | xenhvm
xen/xenbus/xenbusb_front.c optional xen | xenhvm
xen/xenbus/xenbusb_back.c optional xen | xenhvm
xen/xenstore/xenstore.c optional xen | xenhvm
xen/xenstore/xenstore_dev.c optional xen | xenhvm
dev/xen/balloon/balloon.c optional xen | xenhvm
dev/xen/blkfront/blkfront.c optional xen | xenhvm
dev/xen/blkback/blkback.c optional xen | xenhvm
dev/xen/console/console.c optional xen
dev/xen/console/xencons_ring.c optional xen
dev/xen/blkfront/blkfront.c optional xen | xenhvm
dev/xen/control/control.c optional xen | xenhvm
dev/xen/netfront/netfront.c optional xen | xenhvm
dev/xen/xenpci/xenpci.c optional xenpci
dev/xen/xenpci/evtchn.c optional xenpci
dev/xen/xenpci/machine_reboot.c optional xenpci

View File

@ -179,7 +179,7 @@ acpi_pci_set_powerstate_method(device_t dev, device_t child, int state)
*/
ACPI_SERIAL_BEGIN(pci_powerstate);
old_state = pci_get_powerstate(child);
if (old_state < state) {
if (old_state < state && pci_do_power_suspend) {
error = pci_set_powerstate_method(dev, child, state);
if (error)
goto out;

View File

@ -374,6 +374,7 @@ static void bge_tick(void *);
static void bge_stats_clear_regs(struct bge_softc *);
static void bge_stats_update(struct bge_softc *);
static void bge_stats_update_regs(struct bge_softc *);
static struct mbuf *bge_check_short_dma(struct mbuf *);
static struct mbuf *bge_setup_tso(struct bge_softc *, struct mbuf *,
uint16_t *);
static int bge_encap(struct bge_softc *, struct mbuf **, uint32_t *);
@ -1692,6 +1693,11 @@ bge_blockinit(struct bge_softc *sc)
bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0);
}
/* Choose de-pipeline mode for BCM5906 A1. */
if (sc->bge_asicrev == BGE_ASICREV_BCM5906 &&
sc->bge_chiprev == BGE_CHIPID_BCM5906_A1)
CSR_WRITE_4(sc, BGE_ISO_PKT_TX,
(CSR_READ_4(sc, BGE_ISO_PKT_TX) & ~3) | 2);
/*
* The BD ring replenish thresholds control how often the
* hardware fetches new BD's from the producer rings in host
@ -2633,6 +2639,8 @@ bge_attach(device_t dev)
case BGE_ASICREV_BCM5752:
case BGE_ASICREV_BCM5906:
sc->bge_flags |= BGE_FLAG_575X_PLUS;
if (sc->bge_asicrev == BGE_ASICREV_BCM5906)
sc->bge_flags |= BGE_FLAG_SHORT_DMA_BUG;
/* FALLTHROUGH */
case BGE_ASICREV_BCM5705:
sc->bge_flags |= BGE_FLAG_5705_PLUS;
@ -4059,6 +4067,39 @@ bge_cksum_pad(struct mbuf *m)
return (0);
}
static struct mbuf *
bge_check_short_dma(struct mbuf *m)
{
struct mbuf *n;
int found;
/*
* If device receive two back-to-back send BDs with less than
* or equal to 8 total bytes then the device may hang. The two
* back-to-back send BDs must in the same frame for this failure
* to occur. Scan mbuf chains and see whether two back-to-back
* send BDs are there. If this is the case, allocate new mbuf
* and copy the frame to workaround the silicon bug.
*/
for (n = m, found = 0; n != NULL; n = n->m_next) {
if (n->m_len < 8) {
found++;
if (found > 1)
break;
continue;
}
found = 0;
}
if (found > 1) {
n = m_defrag(m, M_DONTWAIT);
if (n == NULL)
m_freem(m);
} else
n = m;
return (n);
}
static struct mbuf *
bge_setup_tso(struct bge_softc *sc, struct mbuf *m, uint16_t *mss)
{
@ -4132,6 +4173,13 @@ bge_encap(struct bge_softc *sc, struct mbuf **m_head, uint32_t *txidx)
csum_flags = 0;
mss = 0;
vlan_tag = 0;
if ((sc->bge_flags & BGE_FLAG_SHORT_DMA_BUG) != 0 &&
m->m_next != NULL) {
*m_head = bge_check_short_dma(m);
if (*m_head == NULL)
return (ENOBUFS);
m = *m_head;
}
if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
*m_head = m = bge_setup_tso(sc, m, &mss);
if (*m_head == NULL)
@ -4366,6 +4414,7 @@ bge_init_locked(struct bge_softc *sc)
{
struct ifnet *ifp;
uint16_t *m;
uint32_t mode;
BGE_LOCK_ASSERT(sc);
@ -4471,8 +4520,12 @@ bge_init_locked(struct bge_softc *sc)
/* Init TX ring. */
bge_init_tx_ring(sc);
/* Enable TX MAC state machine lockup fix. */
mode = CSR_READ_4(sc, BGE_TX_MODE);
if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906)
mode |= BGE_TXMODE_MBUF_LOCKUP_FIX;
/* Turn on transmitter. */
BGE_SETBIT(sc, BGE_TX_MODE, BGE_TXMODE_ENABLE);
CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE);
/* Turn on receiver. */
BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE);

View File

@ -765,6 +765,7 @@
#define BGE_TXMODE_FLOWCTL_ENABLE 0x00000010
#define BGE_TXMODE_BIGBACKOFF_ENABLE 0x00000020
#define BGE_TXMODE_LONGPAUSE_ENABLE 0x00000040
#define BGE_TXMODE_MBUF_LOCKUP_FIX 0x00000100
/* Transmit MAC status register */
#define BGE_TXSTAT_RX_XOFFED 0x00000001
@ -879,6 +880,7 @@
#define BGE_SDI_STATS_CTL 0x0C08
#define BGE_SDI_STATS_ENABLE_MASK 0x0C0C
#define BGE_SDI_STATS_INCREMENT_MASK 0x0C10
#define BGE_ISO_PKT_TX 0x0C20
#define BGE_LOCSTATS_COS0 0x0C80
#define BGE_LOCSTATS_COS1 0x0C84
#define BGE_LOCSTATS_COS2 0x0C88
@ -2727,6 +2729,7 @@ struct bge_softc {
#define BGE_FLAG_40BIT_BUG 0x01000000
#define BGE_FLAG_4G_BNDRY_BUG 0x02000000
#define BGE_FLAG_RX_ALIGNBUG 0x04000000
#define BGE_FLAG_SHORT_DMA_BUG 0x08000000
uint32_t bge_phy_flags;
#define BGE_PHY_WIRESPEED 0x00000001
#define BGE_PHY_ADC_BUG 0x00000002

View File

@ -1356,7 +1356,7 @@ iwi_checkforqos(struct ieee80211vap *vap,
wme = NULL;
while (frm < efrm) {
IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1], return);
IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1], break);
switch (*frm) {
case IEEE80211_ELEMID_VENDOR:
if (iswmeoui(frm))
@ -1368,7 +1368,7 @@ iwi_checkforqos(struct ieee80211vap *vap,
ni = vap->iv_bss;
ni->ni_capinfo = capinfo;
ni->ni_associd = associd;
ni->ni_associd = associd & 0x3fff;
if (wme != NULL)
ni->ni_flags |= IEEE80211_NODE_QOS;
else

View File

@ -975,7 +975,9 @@ enum mfi_pd_state {
MFI_PD_STATE_OFFLINE = 0x10,
MFI_PD_STATE_FAILED = 0x11,
MFI_PD_STATE_REBUILD = 0x14,
MFI_PD_STATE_ONLINE = 0x18
MFI_PD_STATE_ONLINE = 0x18,
MFI_PD_STATE_COPYBACK = 0x20,
MFI_PD_STATE_SYSTEM = 0x40
};
union mfi_ld_ref {

View File

@ -57,7 +57,8 @@ static int mvs_ch_deinit(device_t dev);
static int mvs_ch_suspend(device_t dev);
static int mvs_ch_resume(device_t dev);
static void mvs_dmainit(device_t dev);
static void mvs_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error);
static void mvs_dmasetupc_cb(void *xsc,
bus_dma_segment_t *segs, int nsegs, int error);
static void mvs_dmafini(device_t dev);
static void mvs_slotsalloc(device_t dev);
static void mvs_slotsfree(device_t dev);
@ -79,7 +80,8 @@ static void mvs_crbq_intr(device_t dev);
static void mvs_begin_transaction(device_t dev, union ccb *ccb);
static void mvs_legacy_execute_transaction(struct mvs_slot *slot);
static void mvs_timeout(struct mvs_slot *slot);
static void mvs_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error);
static void mvs_dmasetprd(void *arg,
bus_dma_segment_t *segs, int nsegs, int error);
static void mvs_requeue_frozen(device_t dev);
static void mvs_execute_transaction(struct mvs_slot *slot);
static void mvs_end_transaction(struct mvs_slot *slot, enum mvs_err_type et);
@ -314,9 +316,11 @@ mvs_dmainit(device_t dev)
if (bus_dmamem_alloc(ch->dma.workrq_tag, (void **)&ch->dma.workrq, 0,
&ch->dma.workrq_map))
goto error;
if (bus_dmamap_load(ch->dma.workrq_tag, ch->dma.workrq_map, ch->dma.workrq,
MVS_WORKRQ_SIZE, mvs_dmasetupc_cb, &dcba, 0) || dcba.error) {
bus_dmamem_free(ch->dma.workrq_tag, ch->dma.workrq, ch->dma.workrq_map);
if (bus_dmamap_load(ch->dma.workrq_tag, ch->dma.workrq_map,
ch->dma.workrq, MVS_WORKRQ_SIZE, mvs_dmasetupc_cb, &dcba, 0) ||
dcba.error) {
bus_dmamem_free(ch->dma.workrq_tag,
ch->dma.workrq, ch->dma.workrq_map);
goto error;
}
ch->dma.workrq_bus = dcba.maddr;
@ -329,9 +333,11 @@ mvs_dmainit(device_t dev)
if (bus_dmamem_alloc(ch->dma.workrp_tag, (void **)&ch->dma.workrp, 0,
&ch->dma.workrp_map))
goto error;
if (bus_dmamap_load(ch->dma.workrp_tag, ch->dma.workrp_map, ch->dma.workrp,
MVS_WORKRP_SIZE, mvs_dmasetupc_cb, &dcba, 0) || dcba.error) {
bus_dmamem_free(ch->dma.workrp_tag, ch->dma.workrp, ch->dma.workrp_map);
if (bus_dmamap_load(ch->dma.workrp_tag, ch->dma.workrp_map,
ch->dma.workrp, MVS_WORKRP_SIZE, mvs_dmasetupc_cb, &dcba, 0) ||
dcba.error) {
bus_dmamem_free(ch->dma.workrp_tag,
ch->dma.workrp, ch->dma.workrp_map);
goto error;
}
ch->dma.workrp_bus = dcba.maddr;
@ -371,7 +377,8 @@ mvs_dmafini(device_t dev)
}
if (ch->dma.workrp_bus) {
bus_dmamap_unload(ch->dma.workrp_tag, ch->dma.workrp_map);
bus_dmamem_free(ch->dma.workrp_tag, ch->dma.workrp, ch->dma.workrp_map);
bus_dmamem_free(ch->dma.workrp_tag,
ch->dma.workrp, ch->dma.workrp_map);
ch->dma.workrp_bus = 0;
ch->dma.workrp_map = NULL;
ch->dma.workrp = NULL;
@ -382,7 +389,8 @@ mvs_dmafini(device_t dev)
}
if (ch->dma.workrq_bus) {
bus_dmamap_unload(ch->dma.workrq_tag, ch->dma.workrq_map);
bus_dmamem_free(ch->dma.workrq_tag, ch->dma.workrq, ch->dma.workrq_map);
bus_dmamem_free(ch->dma.workrq_tag,
ch->dma.workrq, ch->dma.workrq_map);
ch->dma.workrq_bus = 0;
ch->dma.workrq_map = NULL;
ch->dma.workrq = NULL;
@ -444,14 +452,16 @@ mvs_setup_edma_queues(device_t dev)
ATA_OUTL(ch->r_mem, EDMA_REQQBAH, work >> 32);
ATA_OUTL(ch->r_mem, EDMA_REQQIP, work & 0xffffffff);
ATA_OUTL(ch->r_mem, EDMA_REQQOP, work & 0xffffffff);
bus_dmamap_sync(ch->dma.workrq_tag, ch->dma.workrq_map, BUS_DMASYNC_PREWRITE);
bus_dmamap_sync(ch->dma.workrq_tag, ch->dma.workrq_map,
BUS_DMASYNC_PREWRITE);
/* Reponses queue. */
bzero(ch->dma.workrp, 256);
memset(ch->dma.workrp, 0xff, MVS_WORKRP_SIZE);
work = ch->dma.workrp_bus;
ATA_OUTL(ch->r_mem, EDMA_RESQBAH, work >> 32);
ATA_OUTL(ch->r_mem, EDMA_RESQIP, work & 0xffffffff);
ATA_OUTL(ch->r_mem, EDMA_RESQOP, work & 0xffffffff);
bus_dmamap_sync(ch->dma.workrp_tag, ch->dma.workrp_map, BUS_DMASYNC_PREREAD);
bus_dmamap_sync(ch->dma.workrp_tag, ch->dma.workrp_map,
BUS_DMASYNC_PREREAD);
ch->out_idx = 0;
ch->in_idx = 0;
}
@ -678,20 +688,15 @@ mvs_ch_intr(void *data)
int i, ccs, port = -1, selfdis = 0;
int edma = (ch->numtslots != 0 || ch->numdslots != 0);
//device_printf(dev, "irq cause %02x EDMA %d IEC %08x\n",
// arg->cause, edma, ATA_INL(ch->r_mem, EDMA_IEC));
/* New item in response queue. */
if ((arg->cause & 2) && edma)
mvs_crbq_intr(dev);
/* Some error or special event. */
if (arg->cause & 1) {
iec = ATA_INL(ch->r_mem, EDMA_IEC);
//device_printf(dev, "irq cause %02x EDMA %d IEC %08x\n",
// arg->cause, edma, iec);
if (iec & EDMA_IE_SERRINT) {
serr = ATA_INL(ch->r_mem, SATA_SE);
ATA_OUTL(ch->r_mem, SATA_SE, serr);
//device_printf(dev, "SERR %08x\n", serr);
}
/* EDMA self-disabled due to error. */
if (iec & EDMA_IE_ESELFDIS)
@ -706,7 +711,6 @@ mvs_ch_intr(void *data)
fisic = SATA_FISC_FISWAIT4HOSTRDYEN_B1;
else /* For Gen-IIe - read FIS interrupt cause. */
fisic = ATA_INL(ch->r_mem, SATA_FISIC);
//device_printf(dev, "FISIC %08x\n", fisic);
}
if (selfdis)
ch->curr_mode = MVS_EDMA_UNKNOWN;
@ -745,7 +749,6 @@ mvs_ch_intr(void *data)
}
}
}
//device_printf(dev, "err slot %d port %d\n", ccs, port);
mvs_requeue_frozen(dev);
for (i = 0; i < MVS_MAX_SLOTS; i++) {
/* XXX: reqests in loading state. */
@ -771,7 +774,8 @@ mvs_ch_intr(void *data)
ch->fatalerr = 1;
}
} else if (iec & 0xfc1e9000) {
if (ch->numtslots == 0 && i != ccs && port != -2)
if (ch->numtslots == 0 &&
i != ccs && port != -2)
et = MVS_ERR_INNOCENT;
else
et = MVS_ERR_SATA;
@ -823,8 +827,6 @@ mvs_legacy_intr(device_t dev)
/* Clear interrupt and get status. */
status = mvs_getstatus(dev, 1);
// device_printf(dev, "Legacy intr status %02x\n",
// status);
if (slot->state < MVS_SLOT_RUNNING)
return;
port = ccb->ccb_h.target_id & 0x0f;
@ -867,7 +869,8 @@ mvs_legacy_intr(device_t dev)
/* If data write command - put them */
if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) {
if (mvs_wait(dev, ATA_S_DRQ, ATA_S_BUSY, 1000) < 0) {
device_printf(dev, "timeout waiting for write DRQ\n");
device_printf(dev,
"timeout waiting for write DRQ\n");
et = MVS_ERR_TIMEOUT;
goto end_finished;
}
@ -890,19 +893,18 @@ mvs_legacy_intr(device_t dev)
ATA_OUTL(ch->r_mem, DMA_C, 0);
goto end_finished;
} else { /* ATAPI PIO */
length = ATA_INB(ch->r_mem,ATA_CYL_LSB) | (ATA_INB(ch->r_mem,ATA_CYL_MSB) << 8);
length = ATA_INB(ch->r_mem,ATA_CYL_LSB) |
(ATA_INB(ch->r_mem,ATA_CYL_MSB) << 8);
ireason = ATA_INB(ch->r_mem,ATA_IREASON);
//device_printf(dev, "status %02x, ireason %02x, length %d\n", status, ireason, length);
switch ((ireason & (ATA_I_CMD | ATA_I_IN)) |
(status & ATA_S_DRQ)) {
case ATAPI_P_CMDOUT:
device_printf(dev, "ATAPI CMDOUT\n");
device_printf(dev, "ATAPI CMDOUT\n");
/* Return wait for interrupt */
return;
case ATAPI_P_WRITE:
//device_printf(dev, "ATAPI WRITE\n");
if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
device_printf(dev, "trying to write on read buffer\n");
et = MVS_ERR_TFE;
@ -920,7 +922,6 @@ device_printf(dev, "ATAPI CMDOUT\n");
return;
case ATAPI_P_READ:
//device_printf(dev, "ATAPI READ\n");
if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) {
device_printf(dev, "trying to read on write buffer\n");
et = MVS_ERR_TFE;
@ -937,7 +938,6 @@ device_printf(dev, "ATAPI CMDOUT\n");
return;
case ATAPI_P_DONEDRQ:
device_printf(dev, "ATAPI DONEDRQ\n");
device_printf(dev,
"WARNING - DONEDRQ non conformant device\n");
if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
@ -958,13 +958,13 @@ device_printf(dev, "ATAPI DONEDRQ\n");
case ATAPI_P_ABORT:
case ATAPI_P_DONE:
//device_printf(dev, "ATAPI ABORT/DONE\n");
if (status & (ATA_S_ERROR | ATA_S_DWF))
et = MVS_ERR_TFE;
goto end_finished;
default:
device_printf(dev, "unknown transfer phase (status %02x, ireason %02x)\n",
device_printf(dev, "unknown transfer phase"
" (status %02x, ireason %02x)\n",
status, ireason);
et = MVS_ERR_TFE;
}
@ -980,38 +980,54 @@ mvs_crbq_intr(device_t dev)
struct mvs_channel *ch = device_get_softc(dev);
struct mvs_crpb *crpb;
union ccb *ccb;
int in_idx, cin_idx, slot;
int in_idx, fin_idx, cin_idx, slot;
uint32_t val;
uint16_t flags;
in_idx = (ATA_INL(ch->r_mem, EDMA_RESQIP) & EDMA_RESQP_ERPQP_MASK) >>
val = ATA_INL(ch->r_mem, EDMA_RESQIP);
if (val == 0)
val = ATA_INL(ch->r_mem, EDMA_RESQIP);
in_idx = (val & EDMA_RESQP_ERPQP_MASK) >>
EDMA_RESQP_ERPQP_SHIFT;
bus_dmamap_sync(ch->dma.workrp_tag, ch->dma.workrp_map,
BUS_DMASYNC_POSTREAD);
cin_idx = ch->in_idx;
fin_idx = cin_idx = ch->in_idx;
ch->in_idx = in_idx;
while (in_idx != cin_idx) {
crpb = (struct mvs_crpb *)
(ch->dma.workrp + MVS_CRPB_OFFSET + (MVS_CRPB_SIZE * cin_idx));
(ch->dma.workrp + MVS_CRPB_OFFSET +
(MVS_CRPB_SIZE * cin_idx));
slot = le16toh(crpb->id) & MVS_CRPB_TAG_MASK;
flags = le16toh(crpb->rspflg);
//device_printf(dev, "CRPB %d %d %04x\n", cin_idx, slot, flags);
/*
* Handle only successfull completions here.
* Errors will be handled by main intr handler.
*/
if (ch->numtslots != 0 || (flags & EDMA_IE_EDEVERR) == 0) {
if ((flags >> 8) & ATA_S_ERROR)
device_printf(dev, "ERROR STATUS CRPB %d %d %04x\n", cin_idx, slot, flags);
if (crpb->id == 0xffff && crpb->rspflg == 0xffff) {
device_printf(dev, "Unfilled CRPB "
"%d (%d->%d) tag %d flags %04x rs %08x\n",
cin_idx, fin_idx, in_idx, slot, flags, ch->rslots);
} else if (ch->numtslots != 0 ||
(flags & EDMA_IE_EDEVERR) == 0) {
crpb->id = 0xffff;
crpb->rspflg = 0xffff;
if (ch->slot[slot].state >= MVS_SLOT_RUNNING) {
ccb = ch->slot[slot].ccb;
ccb->ataio.res.status = (flags & MVS_CRPB_ATASTS_MASK) >>
ccb->ataio.res.status =
(flags & MVS_CRPB_ATASTS_MASK) >>
MVS_CRPB_ATASTS_SHIFT;
mvs_end_transaction(&ch->slot[slot], MVS_ERR_NONE);
} else
device_printf(dev, "EMPTY CRPB %d (->%d) %d %04x\n", cin_idx, in_idx, slot, flags);
} else
device_printf(dev, "ERROR FLAGS CRPB %d %d %04x\n", cin_idx, slot, flags);
} else {
device_printf(dev, "Unused tag in CRPB "
"%d (%d->%d) tag %d flags %04x rs %08x\n",
cin_idx, fin_idx, in_idx, slot, flags,
ch->rslots);
}
} else {
device_printf(dev,
"CRPB with error %d tag %d flags %04x\n",
cin_idx, slot, flags);
}
cin_idx = (cin_idx + 1) & (MVS_MAX_SLOTS - 1);
}
bus_dmamap_sync(ch->dma.workrp_tag, ch->dma.workrp_map,
@ -1266,8 +1282,6 @@ mvs_legacy_execute_transaction(struct mvs_slot *slot)
ch->rslots |= (1 << slot->slot);
ATA_OUTB(ch->r_mem, SATA_SATAICTL, port << SATA_SATAICTL_PMPTX_SHIFT);
if (ccb->ccb_h.func_code == XPT_ATA_IO) {
// device_printf(dev, "%d Legacy command %02x size %d\n",
// port, ccb->ataio.cmd.command, ccb->ataio.dxfer_len);
mvs_tfd_write(dev, ccb);
/* Device reset doesn't interrupt. */
if (ccb->ataio.cmd.command == ATA_DEVICE_RESET) {
@ -1287,7 +1301,8 @@ mvs_legacy_execute_transaction(struct mvs_slot *slot)
/* If data write command - output the data */
if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) {
if (mvs_wait(dev, ATA_S_DRQ, ATA_S_BUSY, 1000) < 0) {
device_printf(dev, "timeout waiting for write DRQ\n");
device_printf(dev,
"timeout waiting for write DRQ\n");
mvs_end_transaction(slot, MVS_ERR_TIMEOUT);
return;
}
@ -1296,9 +1311,6 @@ mvs_legacy_execute_transaction(struct mvs_slot *slot)
ch->transfersize / 2);
}
} else {
// device_printf(dev, "%d ATAPI command %02x size %d dma %d\n",
// port, ccb->csio.cdb_io.cdb_bytes[0], ccb->csio.dxfer_len,
// ch->basic_dma);
ch->donecount = 0;
ch->transfersize = min(ccb->csio.dxfer_len,
ch->curr[port].bytecount);
@ -1331,7 +1343,8 @@ mvs_legacy_execute_transaction(struct mvs_slot *slot)
DELAY(20);
}
if (timeout <= 0) {
device_printf(dev, "timeout waiting for ATAPI command ready\n");
device_printf(dev,
"timeout waiting for ATAPI command ready\n");
mvs_end_transaction(slot, MVS_ERR_TIMEOUT);
return;
}
@ -1371,8 +1384,6 @@ mvs_execute_transaction(struct mvs_slot *slot)
int port = ccb->ccb_h.target_id & 0x0f;
int i;
// device_printf(dev, "%d EDMA command %02x size %d slot %d tag %d\n",
// port, ccb->ataio.cmd.command, ccb->ataio.dxfer_len, slot->slot, slot->tag);
/* Get address of the prepared EPRD */
eprd = ch->dma.workrq_bus + MVS_EPRD_OFFSET + (MVS_EPRD_SIZE * slot->slot);
/* Prepare CRQB. Gen IIe uses different CRQB format. */
@ -1554,7 +1565,6 @@ mvs_end_transaction(struct mvs_slot *slot, enum mvs_err_type et)
union ccb *ccb = slot->ccb;
int lastto;
//device_printf(dev, "cmd done status %d\n", et);
bus_dmamap_sync(ch->dma.workrq_tag, ch->dma.workrq_map,
BUS_DMASYNC_POSTWRITE);
/* Read result registers to the result struct
@ -1792,7 +1802,8 @@ mvs_process_read_log(device_t dev, union ccb *ccb)
if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
device_printf(dev, "Error while READ LOG EXT\n");
else if ((data[0] & 0x80) == 0) {
device_printf(dev, "Non-queued command error in READ LOG EXT\n");
device_printf(dev,
"Non-queued command error in READ LOG EXT\n");
}
for (i = 0; i < MVS_MAX_SLOTS; i++) {
if (!ch->hold[i])

View File

@ -339,7 +339,6 @@ mvs_intr(void *data)
u_int32_t ic, aic;
ic = ATA_INL(ctlr->r_mem, CHIP_MIC);
//device_printf(ctlr->dev, "irq MIC:%08x\n", ic);
if (ctlr->msi) {
/* We have to to mask MSI during processing. */
mtx_lock(&ctlr->mtx);

View File

@ -295,7 +295,6 @@ mvs_intr(void *data)
u_int32_t ic, aic;
ic = ATA_INL(ctlr->r_mem, CHIP_SOC_MIC);
//device_printf(ctlr->dev, "irq MIC:%08x\n", ic);
if ((ic & IC_HC0) == 0)
return;
/* Acknowledge interrupts of this HC. */

View File

@ -182,6 +182,7 @@ struct pci_quirk {
int type;
#define PCI_QUIRK_MAP_REG 1 /* PCI map register in weird place */
#define PCI_QUIRK_DISABLE_MSI 2 /* MSI/MSI-X doesn't work */
#define PCI_QUIRK_ENABLE_MSI_VM 3 /* Older chipset in VM where MSI works */
int arg1;
int arg2;
};
@ -218,6 +219,12 @@ struct pci_quirk pci_quirks[] = {
*/
{ 0x74501022, PCI_QUIRK_DISABLE_MSI, 0, 0 },
/*
* Some virtualization environments emulate an older chipset
* but support MSI just fine. QEMU uses the Intel 82440.
*/
{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM, 0, 0 },
{ 0 }
};
@ -257,6 +264,12 @@ SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
&pci_do_power_resume, 1,
"Transition from D3 -> D0 on resume.");
int pci_do_power_suspend = 1;
TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
&pci_do_power_suspend, 1,
"Transition from D0 -> D3 on suspend.");
static int pci_do_msi = 1;
TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
@ -594,7 +607,7 @@ pci_read_extcap(device_t pcib, pcicfgregs *cfg)
if (cfg->pp.pp_cap == 0) {
cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
if ((nextptr - ptr) > PCIR_POWER_DATA)
cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
}
@ -1827,6 +1840,23 @@ pci_msi_device_blacklisted(device_t dev)
return (0);
}
/*
* Returns true if a specified chipset supports MSI when it is
* emulated hardware in a virtual machine.
*/
static int
pci_msi_vm_chipset(device_t dev)
{
struct pci_quirk *q;
for (q = &pci_quirks[0]; q->devid; q++) {
if (q->devid == pci_get_devid(dev) &&
q->type == PCI_QUIRK_ENABLE_MSI_VM)
return (1);
}
return (0);
}
/*
* Determine if MSI is blacklisted globally on this sytem. Currently,
* we just check for blacklisted chipsets as represented by the
@ -1843,8 +1873,14 @@ pci_msi_blacklisted(void)
return (0);
/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
if (!(pcie_chipset || pcix_chipset))
if (!(pcie_chipset || pcix_chipset)) {
if (vm_guest != VM_GUEST_NO) {
dev = pci_find_bsf(0, 0, 0);
if (dev != NULL)
return (pci_msi_vm_chipset(dev) == 0);
}
return (1);
}
dev = pci_find_bsf(0, 0, 0);
if (dev != NULL)
@ -2954,7 +2990,9 @@ pci_suspend(device_t dev)
free(devlist, M_TEMP);
return (error);
}
pci_set_power_children(dev, devlist, numdevs, PCI_POWERSTATE_D3);
if (pci_do_power_suspend)
pci_set_power_children(dev, devlist, numdevs,
PCI_POWERSTATE_D3);
free(devlist, M_TEMP);
return (0);
}
@ -3656,9 +3694,15 @@ pci_reserve_map(device_t dev, device_t child, int type, int *rid,
res = NULL;
pci_read_bar(child, *rid, &map, &testval);
/* Ignore a BAR with a base of 0. */
if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
pci_mapbase(testval) == 0)
/*
* Determine the size of the BAR and ignore BARs with a size
* of 0. Device ROM BARs use a different mask value.
*/
if (*rid == PCIR_BIOS)
mapsize = pci_romsize(testval);
else
mapsize = pci_mapsize(testval);
if (mapsize == 0)
goto out;
if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
@ -3687,13 +3731,7 @@ pci_reserve_map(device_t dev, device_t child, int type, int *rid,
* actually uses and we would otherwise have a
* situation where we might allocate the excess to
* another driver, which won't work.
*
* Device ROM BARs use a different mask value.
*/
if (*rid == PCIR_BIOS)
mapsize = pci_romsize(testval);
else
mapsize = pci_mapsize(testval);
count = 1UL << mapsize;
if (RF_ALIGNMENT(flags) < mapsize)
flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);

View File

@ -447,7 +447,7 @@ pcib_suspend(device_t dev)
pcib_cfg_save(device_get_softc(dev));
error = bus_generic_suspend(dev);
if (error == 0 && pci_do_power_resume) {
if (error == 0 && pci_do_power_suspend) {
dstate = PCI_POWERSTATE_D3;
pcib = device_get_parent(device_get_parent(dev));
if (PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)

View File

@ -39,6 +39,7 @@
DECLARE_CLASS(pci_driver);
extern int pci_do_power_resume;
extern int pci_do_power_suspend;
void pci_add_children(device_t dev, int domain, int busno,
size_t dinfo_size);

View File

@ -427,12 +427,16 @@
#define PCIR_POWER_CAP 0x2
#define PCIM_PCAP_SPEC 0x0007
#define PCIM_PCAP_PMEREQCLK 0x0008
#define PCIM_PCAP_PMEREQPWR 0x0010
#define PCIM_PCAP_DEVSPECINIT 0x0020
#define PCIM_PCAP_DYNCLOCK 0x0040
#define PCIM_PCAP_SECCLOCK 0x00c0
#define PCIM_PCAP_CLOCKMASK 0x00c0
#define PCIM_PCAP_REQFULLCLOCK 0x0100
#define PCIM_PCAP_AUXPWR_0 0x0000
#define PCIM_PCAP_AUXPWR_55 0x0040
#define PCIM_PCAP_AUXPWR_100 0x0080
#define PCIM_PCAP_AUXPWR_160 0x00c0
#define PCIM_PCAP_AUXPWR_220 0x0100
#define PCIM_PCAP_AUXPWR_270 0x0140
#define PCIM_PCAP_AUXPWR_320 0x0180
#define PCIM_PCAP_AUXPWR_375 0x01c0
#define PCIM_PCAP_AUXPWRMASK 0x01c0
#define PCIM_PCAP_D1SUPP 0x0200
#define PCIM_PCAP_D2SUPP 0x0400
#define PCIM_PCAP_D0PME 0x0800
@ -447,16 +451,17 @@
#define PCIM_PSTAT_D2 0x0002
#define PCIM_PSTAT_D3 0x0003
#define PCIM_PSTAT_DMASK 0x0003
#define PCIM_PSTAT_REPENABLE 0x0010
#define PCIM_PSTAT_NOSOFTRESET 0x0008
#define PCIM_PSTAT_PMEENABLE 0x0100
#define PCIM_PSTAT_D0POWER 0x0000
#define PCIM_PSTAT_D1POWER 0x0200
#define PCIM_PSTAT_D2POWER 0x0400
#define PCIM_PSTAT_D3POWER 0x0600
#define PCIM_PSTAT_D0HEAT 0x0800
#define PCIM_PSTAT_D1HEAT 0x1000
#define PCIM_PSTAT_D2HEAT 0x1200
#define PCIM_PSTAT_D3HEAT 0x1400
#define PCIM_PSTAT_D1HEAT 0x0a00
#define PCIM_PSTAT_D2HEAT 0x0c00
#define PCIM_PSTAT_D3HEAT 0x0e00
#define PCIM_PSTAT_DATASELMASK 0x1e00
#define PCIM_PSTAT_DATAUNKN 0x0000
#define PCIM_PSTAT_DATADIV10 0x2000
#define PCIM_PSTAT_DATADIV100 0x4000
@ -464,11 +469,10 @@
#define PCIM_PSTAT_DATADIVMASK 0x6000
#define PCIM_PSTAT_PME 0x8000
#define PCIR_POWER_PMCSR 0x6
#define PCIM_PMCSR_DCLOCK 0x10
#define PCIM_PMCSR_B2SUPP 0x20
#define PCIM_BMCSR_B3SUPP 0x40
#define PCIM_BMCSR_BPCE 0x80
#define PCIR_POWER_BSE 0x6
#define PCIM_PMCSR_BSE_D3B3 0x00
#define PCIM_PMCSR_BSE_D3B2 0x40
#define PCIM_PMCSR_BSE_BPCCE 0x80
#define PCIR_POWER_DATA 0x7

View File

@ -42,9 +42,9 @@ typedef uint64_t pci_addr_t;
/* Interesting values for PCI power management */
struct pcicfg_pp {
uint16_t pp_cap; /* PCI power management capabilities */
uint8_t pp_status; /* config space address of PCI power status reg */
uint8_t pp_pmcsr; /* config space address of PMCSR reg */
uint8_t pp_data; /* config space address of PCI power data reg */
uint8_t pp_status; /* conf. space addr. of PM control/status reg */
uint8_t pp_bse; /* conf. space addr. of PM BSE reg */
uint8_t pp_data; /* conf. space addr. of PM data reg */
};
struct vpd_readonly {

View File

@ -1795,12 +1795,15 @@ sis_intr(void *arg)
if ((status & SIS_INTRS) == 0) {
/* Not ours. */
SIS_UNLOCK(sc);
return;
}
/* Disable interrupts. */
CSR_WRITE_4(sc, SIS_IER, 0);
for (;(status & SIS_INTRS) != 0;) {
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
if (status &
(SIS_ISR_TX_DESC_OK | SIS_ISR_TX_ERR |
SIS_ISR_TX_OK | SIS_ISR_TX_IDLE) )
@ -1825,11 +1828,13 @@ sis_intr(void *arg)
status = CSR_READ_4(sc, SIS_ISR);
}
/* Re-enable interrupts. */
CSR_WRITE_4(sc, SIS_IER, 1);
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
/* Re-enable interrupts. */
CSR_WRITE_4(sc, SIS_IER, 1);
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
sis_startl(ifp);
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
sis_startl(ifp);
}
SIS_UNLOCK(sc);
}

View File

@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$");
#include <machine/xen/xenfunc.h>
#include <machine/xen/xenvar.h>
#include <xen/hypervisor.h>
#include <xen/xenbus/xenbusvar.h>
#include <xen/xenstore/xenstorevar.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
@ -406,20 +406,20 @@ set_new_target(unsigned long target)
wakeup(balloon_process);
}
static struct xenbus_watch target_watch =
static struct xs_watch target_watch =
{
.node = "memory/target"
};
/* React to a change in the target key */
static void
watch_target(struct xenbus_watch *watch,
watch_target(struct xs_watch *watch,
const char **vec, unsigned int len)
{
unsigned long long new_target;
int err;
err = xenbus_scanf(XBT_NIL, "memory", "target", NULL,
err = xs_scanf(XST_NIL, "memory", "target", NULL,
"%llu", &new_target);
if (err) {
/* This is ok (for domain0 at least) - so just return */
@ -438,7 +438,7 @@ balloon_init_watcher(void *arg)
{
int err;
err = register_xenbus_watch(&target_watch);
err = xs_register_watch(&target_watch);
if (err)
printf("Failed to set balloon watcher\n");

File diff suppressed because it is too large Load Diff

View File

@ -49,8 +49,10 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
#include <sys/bus_dma.h>
#include <machine/_inttypes.h>
#include <machine/xen/xen-os.h>
#include <machine/xen/xenfunc.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <xen/evtchn.h>
@ -68,17 +70,21 @@ __FBSDID("$FreeBSD$");
/* prototypes */
static void xb_free_command(struct xb_command *cm);
static void xb_startio(struct xb_softc *sc);
static void connect(struct xb_softc *);
static void blkfront_connect(struct xb_softc *);
static void blkfront_closing(device_t);
static int blkfront_detach(device_t);
static int talk_to_backend(struct xb_softc *);
static int setup_blkring(struct xb_softc *);
static void blkif_int(void *);
static void blkfront_initialize(struct xb_softc *);
#if 0
static void blkif_recover(struct xb_softc *);
static void blkif_completion(struct xb_command *);
#endif
static int blkif_completion(struct xb_command *);
static void blkif_free(struct xb_softc *, int);
static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int);
MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
#define GRANT_INVALID_REF 0
/* Control whether runtime update of vbds is enabled. */
@ -113,11 +119,6 @@ static char * blkif_status_name[] = {
#define DPRINTK(fmt, args...)
#endif
#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
#define BLKIF_MAXIO (32 * 1024)
static int blkif_open(struct disk *dp);
static int blkif_close(struct disk *dp);
static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
@ -202,8 +203,8 @@ blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
}
int
xlvbd_add(struct xb_softc *sc, blkif_sector_t capacity,
int vdevice, uint16_t vdisk_info, uint16_t sector_size)
xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors,
int vdevice, uint16_t vdisk_info, unsigned long sector_size)
{
int unit, error = 0;
const char *name;
@ -215,7 +216,6 @@ xlvbd_add(struct xb_softc *sc, blkif_sector_t capacity,
if (strcmp(name, "xbd"))
device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit);
memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
sc->xb_disk = disk_alloc();
sc->xb_disk->d_unit = sc->xb_unit;
sc->xb_disk->d_open = blkif_open;
@ -227,20 +227,14 @@ xlvbd_add(struct xb_softc *sc, blkif_sector_t capacity,
sc->xb_disk->d_drv1 = sc;
sc->xb_disk->d_sectorsize = sector_size;
sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
sc->xb_disk->d_maxsize = BLKIF_MAXIO;
sc->xb_disk->d_mediasize = sectors * sector_size;
sc->xb_disk->d_maxsize = sc->max_request_size;
sc->xb_disk->d_flags = 0;
disk_create(sc->xb_disk, DISK_VERSION_00);
return error;
}
void
xlvbd_del(struct xb_softc *sc)
{
disk_destroy(sc->xb_disk);
}
/************************ end VBD support *****************/
/*
@ -357,15 +351,16 @@ xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
return (EBUSY);
}
if (gnttab_alloc_grant_references(
BLKIF_MAX_SEGMENTS_PER_REQUEST, &cm->gref_head) < 0) {
if (gnttab_alloc_grant_references(sc->max_request_segments,
&cm->gref_head) != 0) {
xb_free_command(cm);
mtx_unlock(&sc->xb_io_lock);
device_printf(sc->xb_dev, "no more grant allocs?\n");
return (EBUSY);
}
chunk = length > BLKIF_MAXIO ? BLKIF_MAXIO : length;
chunk = length > sc->max_request_size
? sc->max_request_size : length;
cm->data = virtual;
cm->datalen = chunk;
cm->operation = BLKIF_OP_WRITE;
@ -423,16 +418,18 @@ static int
blkfront_attach(device_t dev)
{
struct xb_softc *sc;
struct xb_command *cm;
const char *name;
int error, vdevice, i, unit;
int error;
int vdevice;
int i;
int unit;
/* FIXME: Use dynamic device id if this is not set. */
error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
error = xs_scanf(XST_NIL, xenbus_get_node(dev),
"virtual-device", NULL, "%i", &vdevice);
if (error) {
xenbus_dev_fatal(dev, error, "reading virtual-device");
printf("couldn't find virtual device");
device_printf(dev, "Couldn't determine virtual device.\n");
return (error);
}
@ -447,51 +444,18 @@ blkfront_attach(device_t dev)
xb_initq_ready(sc);
xb_initq_complete(sc);
xb_initq_bio(sc);
/* Allocate parent DMA tag */
if (bus_dma_tag_create( NULL, /* parent */
512, 4096, /* algnmnt, boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
BLKIF_MAXIO, /* maxsize */
BLKIF_MAX_SEGMENTS_PER_REQUEST, /* nsegments */
PAGE_SIZE, /* maxsegsize */
BUS_DMA_ALLOCNOW, /* flags */
busdma_lock_mutex, /* lockfunc */
&sc->xb_io_lock, /* lockarg */
&sc->xb_io_dmat)) {
device_printf(dev, "Cannot allocate parent DMA tag\n");
return (ENOMEM);
}
#ifdef notyet
if (bus_dma_tag_set(sc->xb_io_dmat, BUS_DMA_SET_MINSEGSZ,
XBD_SECTOR_SIZE)) {
device_printf(dev, "Cannot set sector size\n");
return (EINVAL);
}
#endif
for (i = 0; i < XBF_MAX_RING_PAGES; i++)
sc->ring_ref[i] = GRANT_INVALID_REF;
sc->xb_dev = dev;
sc->vdevice = vdevice;
sc->connected = BLKIF_STATE_DISCONNECTED;
/* work queue needed ? */
for (i = 0; i < BLK_RING_SIZE; i++) {
cm = &sc->shadow[i];
cm->req.id = i;
cm->cm_sc = sc;
if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0)
break;
xb_free_command(cm);
}
/* Front end dir is a number, which is used as the id. */
sc->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
error = talk_to_backend(sc);
if (error)
return (error);
/* Wait for backend device to publish its protocol capabilities. */
xenbus_set_state(dev, XenbusStateInitialising);
return (0);
}
@ -512,121 +476,265 @@ blkfront_suspend(device_t dev)
static int
blkfront_resume(device_t dev)
{
#if 0
struct xb_softc *sc = device_get_softc(dev);
int err;
DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
/* XXX This can't work!!! */
blkif_free(sc, 1);
err = talk_to_backend(sc);
if (sc->connected == BLKIF_STATE_SUSPENDED && !err)
blkfront_initialize(sc);
if (sc->connected == BLKIF_STATE_SUSPENDED)
blkif_recover(sc);
return (err);
#endif
return (0);
}
/* Common code used when first setting up, and when resuming. */
static int
talk_to_backend(struct xb_softc *sc)
static void
blkfront_initialize(struct xb_softc *sc)
{
device_t dev;
struct xenbus_transaction xbt;
const char *message = NULL;
int err;
const char *otherend_path;
const char *node_path;
int error;
int i;
/* Create shared ring, alloc event channel. */
dev = sc->xb_dev;
err = setup_blkring(sc);
if (err)
goto out;
if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising)
return;
again:
err = xenbus_transaction_start(&xbt);
if (err) {
xenbus_dev_fatal(dev, err, "starting transaction");
goto destroy_blkring;
/*
* Protocol defaults valid even if negotiation for a
* setting fails.
*/
sc->ring_pages = 1;
sc->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE);
sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
sc->max_request_size = sc->max_request_segments * PAGE_SIZE;
sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
/*
* Protocol negotiation.
*
* \note xs_gather() returns on the first encountered error, so
* we must use independant calls in order to guarantee
* we don't miss information in a sparsly populated back-end
* tree.
*/
otherend_path = xenbus_get_otherend_path(sc->xb_dev);
node_path = xenbus_get_node(sc->xb_dev);
(void)xs_scanf(XST_NIL, otherend_path,
"max-ring-pages", NULL, "%" PRIu32,
&sc->ring_pages);
(void)xs_scanf(XST_NIL, otherend_path,
"max-requests", NULL, "%" PRIu32,
&sc->max_requests);
(void)xs_scanf(XST_NIL, otherend_path,
"max-request-segments", NULL, "%" PRIu32,
&sc->max_request_segments);
(void)xs_scanf(XST_NIL, otherend_path,
"max-request-size", NULL, "%" PRIu32,
&sc->max_request_size);
if (sc->ring_pages > XBF_MAX_RING_PAGES) {
device_printf(sc->xb_dev, "Back-end specified ring-pages of "
"%u limited to front-end limit of %zu.\n",
sc->ring_pages, XBF_MAX_RING_PAGES);
sc->ring_pages = XBF_MAX_RING_PAGES;
}
err = xenbus_printf(xbt, xenbus_get_node(dev),
"ring-ref","%u", sc->ring_ref);
if (err) {
message = "writing ring-ref";
goto abort_transaction;
}
err = xenbus_printf(xbt, xenbus_get_node(dev),
"event-channel", "%u", irq_to_evtchn_port(sc->irq));
if (err) {
message = "writing event-channel";
goto abort_transaction;
}
err = xenbus_printf(xbt, xenbus_get_node(dev),
"protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
if (err) {
message = "writing protocol";
goto abort_transaction;
if (sc->max_requests > XBF_MAX_REQUESTS) {
device_printf(sc->xb_dev, "Back-end specified max_requests of "
"%u limited to front-end limit of %u.\n",
sc->max_requests, XBF_MAX_REQUESTS);
sc->max_requests = XBF_MAX_REQUESTS;
}
err = xenbus_transaction_end(xbt, 0);
if (err) {
if (err == EAGAIN)
goto again;
xenbus_dev_fatal(dev, err, "completing transaction");
goto destroy_blkring;
if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) {
device_printf(sc->xb_dev, "Back-end specificed "
"max_requests_segments of %u limited to "
"front-end limit of %u.\n",
sc->max_request_segments,
XBF_MAX_SEGMENTS_PER_REQUEST);
sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST;
}
xenbus_set_state(dev, XenbusStateInitialised);
return 0;
abort_transaction:
xenbus_transaction_end(xbt, 1);
if (message)
xenbus_dev_fatal(dev, err, "%s", message);
destroy_blkring:
blkif_free(sc, 0);
out:
return err;
if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) {
device_printf(sc->xb_dev, "Back-end specificed "
"max_request_size of %u limited to front-end "
"limit of %u.\n", sc->max_request_size,
XBF_MAX_REQUEST_SIZE);
sc->max_request_size = XBF_MAX_REQUEST_SIZE;
}
sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
/* Allocate datastructures based on negotiated values. */
error = bus_dma_tag_create(NULL, /* parent */
512, PAGE_SIZE, /* algnmnt, boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
sc->max_request_size,
sc->max_request_segments,
PAGE_SIZE, /* maxsegsize */
BUS_DMA_ALLOCNOW, /* flags */
busdma_lock_mutex, /* lockfunc */
&sc->xb_io_lock, /* lockarg */
&sc->xb_io_dmat);
if (error != 0) {
xenbus_dev_fatal(sc->xb_dev, error,
"Cannot allocate parent DMA tag\n");
return;
}
/* Per-transaction data allocation. */
sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests,
M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
if (sc->shadow == NULL) {
xenbus_dev_fatal(sc->xb_dev, error,
"Cannot allocate request structures\n");
}
for (i = 0; i < sc->max_requests; i++) {
struct xb_command *cm;
cm = &sc->shadow[i];
cm->sg_refs = malloc(sizeof(grant_ref_t)
* sc->max_request_segments,
M_XENBLOCKFRONT, M_NOWAIT);
if (cm->sg_refs == NULL)
break;
cm->id = i;
cm->cm_sc = sc;
if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0)
break;
xb_free_command(cm);
}
if (setup_blkring(sc) != 0)
return;
error = xs_printf(XST_NIL, node_path,
"ring-pages","%u", sc->ring_pages);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/ring-pages",
node_path);
return;
}
error = xs_printf(XST_NIL, node_path,
"max-requests","%u", sc->max_requests);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/max-requests",
node_path);
return;
}
error = xs_printf(XST_NIL, node_path,
"max-request-segments","%u", sc->max_request_segments);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/max-request-segments",
node_path);
return;
}
error = xs_printf(XST_NIL, node_path,
"max-request-size","%u", sc->max_request_size);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/max-request-size",
node_path);
return;
}
error = xs_printf(XST_NIL, node_path, "event-channel",
"%u", irq_to_evtchn_port(sc->irq));
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/event-channel",
node_path);
return;
}
error = xs_printf(XST_NIL, node_path,
"protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/protocol",
node_path);
return;
}
xenbus_set_state(sc->xb_dev, XenbusStateInitialised);
}
static int
setup_blkring(struct xb_softc *sc)
{
blkif_sring_t *sring;
uintptr_t sring_page_addr;
int error;
int i;
sc->ring_ref = GRANT_INVALID_REF;
sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
M_NOWAIT|M_ZERO);
if (sring == NULL) {
xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring");
return ENOMEM;
return (ENOMEM);
}
SHARED_RING_INIT(sring);
FRONT_RING_INIT(&sc->ring, sring, PAGE_SIZE);
FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE);
error = xenbus_grant_ring(sc->xb_dev,
(vtomach(sc->ring.sring) >> PAGE_SHIFT), &sc->ring_ref);
if (error) {
free(sring, M_DEVBUF);
sc->ring.sring = NULL;
goto fail;
for (i = 0, sring_page_addr = (uintptr_t)sring;
i < sc->ring_pages;
i++, sring_page_addr += PAGE_SIZE) {
error = xenbus_grant_ring(sc->xb_dev,
(vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"granting ring_ref(%d)", i);
return (error);
}
}
error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(sc->xb_dev),
error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
"ring-ref","%u", sc->ring_ref[0]);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error, "writing %s/ring-ref",
xenbus_get_node(sc->xb_dev));
return (error);
}
for (i = 1; i < sc->ring_pages; i++) {
char ring_ref_name[]= "ring_refXX";
snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i);
error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
ring_ref_name, "%u", sc->ring_ref[i]);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error, "writing %s/%s",
xenbus_get_node(sc->xb_dev),
ring_ref_name);
return (error);
}
}
error = bind_listening_port_to_irqhandler(
xenbus_get_otherend_id(sc->xb_dev),
"xbd", (driver_intr_t *)blkif_int, sc,
INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"bind_evtchn_to_irqhandler failed");
goto fail;
return (error);
}
return (0);
fail:
blkif_free(sc, 0);
return (error);
}
/**
* Callback received when the backend's state changes.
*/
@ -640,15 +748,19 @@ blkfront_backend_changed(device_t dev, XenbusState backend_state)
switch (backend_state) {
case XenbusStateUnknown:
case XenbusStateInitialising:
case XenbusStateInitWait:
case XenbusStateInitialised:
case XenbusStateClosed:
case XenbusStateReconfigured:
case XenbusStateReconfiguring:
case XenbusStateClosed:
break;
case XenbusStateInitWait:
blkfront_initialize(sc);
break;
case XenbusStateInitialised:
case XenbusStateConnected:
connect(sc);
blkfront_initialize(sc);
blkfront_connect(sc);
break;
case XenbusStateClosing:
@ -657,20 +769,7 @@ blkfront_backend_changed(device_t dev, XenbusState backend_state)
"Device in use; refusing to close");
else
blkfront_closing(dev);
#ifdef notyet
bd = bdget(sc->dev);
if (bd == NULL)
xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
down(&bd->bd_sem);
if (sc->users > 0)
xenbus_dev_error(dev, -EBUSY,
"Device in use; refusing to close");
else
blkfront_closing(dev);
up(&bd->bd_sem);
bdput(bd);
#endif
break;
}
return (0);
@ -681,7 +780,7 @@ blkfront_backend_changed(device_t dev, XenbusState backend_state)
** the details about the physical device - #sectors, size, etc).
*/
static void
connect(struct xb_softc *sc)
blkfront_connect(struct xb_softc *sc)
{
device_t dev = sc->xb_dev;
unsigned long sectors, sector_size;
@ -694,20 +793,20 @@ connect(struct xb_softc *sc)
DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
"sectors", "%lu", &sectors,
"info", "%u", &binfo,
"sector-size", "%lu", &sector_size,
NULL);
err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
"sectors", "%lu", &sectors,
"info", "%u", &binfo,
"sector-size", "%lu", &sector_size,
NULL);
if (err) {
xenbus_dev_fatal(dev, err,
"reading backend fields at %s",
xenbus_get_otherend_path(dev));
return;
}
err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
"feature-barrier", "%lu", &feature_barrier,
NULL);
err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
"feature-barrier", "%lu", &feature_barrier,
NULL);
if (!err || feature_barrier)
sc->xb_flags |= XB_BARRIER;
@ -741,15 +840,16 @@ blkfront_closing(device_t dev)
{
struct xb_softc *sc = device_get_softc(dev);
xenbus_set_state(dev, XenbusStateClosing);
DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
if (sc->mi) {
DPRINTK("Calling xlvbd_del\n");
xlvbd_del(sc);
sc->mi = NULL;
if (sc->xb_disk != NULL) {
disk_destroy(sc->xb_disk);
sc->xb_disk = NULL;
}
xenbus_set_state(dev, XenbusStateClosed);
xenbus_set_state(dev, XenbusStateClosed);
}
@ -778,11 +878,16 @@ flush_requests(struct xb_softc *sc)
notify_remote_via_irq(sc->irq);
}
static void blkif_restart_queue_callback(void *arg)
static void
blkif_restart_queue_callback(void *arg)
{
struct xb_softc *sc = arg;
mtx_lock(&sc->xb_io_lock);
xb_startio(sc);
mtx_unlock(&sc->xb_io_lock);
}
static int
@ -874,20 +979,17 @@ xb_bio_command(struct xb_softc *sc)
return (NULL);
}
if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST,
&cm->gref_head) < 0) {
if (gnttab_alloc_grant_references(sc->max_request_segments,
&cm->gref_head) != 0) {
gnttab_request_free_callback(&sc->callback,
blkif_restart_queue_callback, sc,
BLKIF_MAX_SEGMENTS_PER_REQUEST);
sc->max_request_segments);
xb_requeue_bio(sc, bp);
xb_enqueue_free(cm);
sc->xb_flags |= XB_FROZEN;
return (NULL);
}
/* XXX Can we grab refs before doing the load so that the ref can
* be filled out here?
*/
cm->bp = bp;
cm->data = bp->bio_data;
cm->datalen = bp->bio_bcount;
@ -921,13 +1023,19 @@ blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
struct xb_softc *sc;
struct xb_command *cm;
blkif_request_t *ring_req;
struct blkif_request_segment *sg;
struct blkif_request_segment *last_block_sg;
grant_ref_t *sg_ref;
vm_paddr_t buffer_ma;
uint64_t fsect, lsect;
int ref, i, op;
int ref;
int op;
int block_segs;
cm = arg;
sc = cm->cm_sc;
//printf("%s: Start\n", __func__);
if (error) {
printf("error %d in blkif_queue_cb\n", error);
cm->bp->bio_error = EIO;
@ -938,43 +1046,62 @@ blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
/* Fill out a communications ring structure. */
ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
if (ring_req == NULL) {
/* XXX Is this possible? */
printf("ring_req NULL, requeuing\n");
xb_enqueue_ready(cm);
return;
}
ring_req->id = cm->req.id;
sc->ring.req_prod_pvt++;
ring_req->id = cm->id;
ring_req->operation = cm->operation;
ring_req->sector_number = cm->sector_number;
ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
ring_req->nr_segments = nsegs;
cm->nseg = nsegs;
for (i = 0; i < nsegs; i++) {
buffer_ma = segs[i].ds_addr;
fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
lsect = fsect + (segs[i].ds_len >> XBD_SECTOR_SHFT) - 1;
block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
sg = ring_req->seg;
last_block_sg = sg + block_segs;
sg_ref = cm->sg_refs;
KASSERT(lsect <= 7,
("XEN disk driver data cannot cross a page boundary"));
while (1) {
/* install a grant reference. */
ref = gnttab_claim_grant_reference(&cm->gref_head);
KASSERT( ref >= 0, ("grant_reference failed") );
while (sg < last_block_sg) {
buffer_ma = segs->ds_addr;
fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1;
gnttab_grant_foreign_access_ref(
ref,
xenbus_get_otherend_id(sc->xb_dev),
buffer_ma >> PAGE_SHIFT,
ring_req->operation & 1 ); /* ??? */
KASSERT(lsect <= 7, ("XEN disk driver data cannot "
"cross a page boundary"));
ring_req->seg[i] =
(struct blkif_request_segment) {
/* install a grant reference. */
ref = gnttab_claim_grant_reference(&cm->gref_head);
/*
* GNTTAB_LIST_END == 0xffffffff, but it is private
* to gnttab.c.
*/
KASSERT(ref != ~0, ("grant_reference failed"));
gnttab_grant_foreign_access_ref(
ref,
xenbus_get_otherend_id(sc->xb_dev),
buffer_ma >> PAGE_SHIFT,
ring_req->operation == BLKIF_OP_WRITE);
*sg_ref = ref;
*sg = (struct blkif_request_segment) {
.gref = ref,
.first_sect = fsect,
.last_sect = lsect };
}
sg++;
sg_ref++;
segs++;
nsegs--;
}
block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
if (block_segs == 0)
break;
sg = BLKRING_GET_SG_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
sc->ring.req_prod_pvt++;
last_block_sg = sg + block_segs;
}
if (cm->operation == BLKIF_OP_READ)
op = BUS_DMASYNC_PREREAD;
@ -984,15 +1111,10 @@ blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
op = 0;
bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
sc->ring.req_prod_pvt++;
/* Keep a private copy so we can reissue requests when recovering. */
cm->req = *ring_req;
gnttab_free_grant_references(cm->gref_head);
xb_enqueue_busy(cm);
gnttab_free_grant_references(cm->gref_head);
/*
* This flag means that we're probably executing in the busdma swi
* instead of in the startio context, so an explicit flush is needed.
@ -1000,6 +1122,7 @@ blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
if (cm->cm_flags & XB_CMD_FROZEN)
flush_requests(sc);
//printf("%s: Done\n", __func__);
return;
}
@ -1018,7 +1141,7 @@ xb_startio(struct xb_softc *sc)
mtx_assert(&sc->xb_io_lock, MA_OWNED);
while (!RING_FULL(&sc->ring)) {
while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) {
if (sc->xb_flags & XB_FROZEN)
break;
@ -1061,12 +1184,12 @@ blkif_int(void *xsc)
rp = sc->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
for (i = sc->ring.rsp_cons; i != rp; i++) {
for (i = sc->ring.rsp_cons; i != rp;) {
bret = RING_GET_RESPONSE(&sc->ring, i);
cm = &sc->shadow[bret->id];
xb_remove_busy(cm);
blkif_completion(cm);
i += blkif_completion(cm);
if (cm->operation == BLKIF_OP_READ)
op = BUS_DMASYNC_POSTREAD;
@ -1116,35 +1239,61 @@ blkif_int(void *xsc)
static void
blkif_free(struct xb_softc *sc, int suspend)
{
uint8_t *sring_page_ptr;
int i;
/* Prevent new requests being issued until we fix things up. */
/* Prevent new requests being issued until we fix things up. */
mtx_lock(&sc->xb_io_lock);
sc->connected = suspend ?
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
mtx_unlock(&sc->xb_io_lock);
/* Free resources associated with old device channel. */
if (sc->ring_ref != GRANT_INVALID_REF) {
gnttab_end_foreign_access(sc->ring_ref,
sc->ring.sring);
sc->ring_ref = GRANT_INVALID_REF;
if (sc->ring.sring != NULL) {
sring_page_ptr = (uint8_t *)sc->ring.sring;
for (i = 0; i < sc->ring_pages; i++) {
if (sc->ring_ref[i] != GRANT_INVALID_REF) {
gnttab_end_foreign_access_ref(sc->ring_ref[i]);
sc->ring_ref[i] = GRANT_INVALID_REF;
}
sring_page_ptr += PAGE_SIZE;
}
free(sc->ring.sring, M_XENBLOCKFRONT);
sc->ring.sring = NULL;
}
if (sc->irq)
unbind_from_irqhandler(sc->irq);
sc->irq = 0;
if (sc->shadow) {
for (i = 0; i < sc->max_requests; i++) {
struct xb_command *cm;
cm = &sc->shadow[i];
if (cm->sg_refs != NULL) {
free(cm->sg_refs, M_XENBLOCKFRONT);
cm->sg_refs = NULL;
}
bus_dmamap_destroy(sc->xb_io_dmat, cm->map);
}
free(sc->shadow, M_XENBLOCKFRONT);
sc->shadow = NULL;
}
if (sc->irq) {
unbind_from_irqhandler(sc->irq);
sc->irq = 0;
}
}
static void
static int
blkif_completion(struct xb_command *s)
{
int i;
for (i = 0; i < s->req.nr_segments; i++)
gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
//printf("%s: Req %p(%d)\n", __func__, s, s->nseg);
gnttab_end_foreign_access_references(s->nseg, s->sg_refs);
return (BLKIF_SEGS_TO_BLOCKS(s->nseg));
}
#if 0
static void
blkif_recover(struct xb_softc *sc)
{
@ -1157,6 +1306,7 @@ blkif_recover(struct xb_softc *sc)
* has been removed until further notice.
*/
}
#endif
/* ** Driver registration ** */
static device_method_t blkfront_methods[] = {
@ -1169,7 +1319,7 @@ static device_method_t blkfront_methods[] = {
DEVMETHOD(device_resume, blkfront_resume),
/* Xenbus interface */
DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed),
DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed),
{ 0, 0 }
};
@ -1181,4 +1331,4 @@ static driver_t blkfront_driver = {
};
devclass_t blkfront_devclass;
DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0);
DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0);

View File

@ -32,7 +32,43 @@
#ifndef __XEN_DRIVERS_BLOCK_H__
#define __XEN_DRIVERS_BLOCK_H__
#include <xen/interface/io/blkif.h>
#include <xen/blkif.h>
/**
* The maximum number of outstanding requests blocks (request headers plus
* additional segment blocks) we will allow in a negotiated block-front/back
* communication channel.
*/
#define XBF_MAX_REQUESTS 256
/**
* The maximum mapped region size per request we will allow in a negotiated
* block-front/back communication channel.
*
* \note We reserve a segement from the maximum supported by the transport to
* guarantee we can handle an unaligned transfer without the need to
* use a bounce buffer..
*/
#define XBF_MAX_REQUEST_SIZE \
MIN(MAXPHYS, (BLKIF_MAX_SEGMENTS_PER_REQUEST - 1) * PAGE_SIZE)
/**
* The maximum number of segments (within a request header and accompanying
* segment blocks) per request we will allow in a negotiated block-front/back
* communication channel.
*/
#define XBF_MAX_SEGMENTS_PER_REQUEST \
(MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
(XBF_MAX_REQUEST_SIZE / PAGE_SIZE) + 1))
/**
* The maximum number of shared memory ring pages we will allow in a
* negotiated block-front/back communication channel. Allow enough
* ring space for all requests to be XBF_MAX_REQUEST_SIZE'd.
*/
#define XBF_MAX_RING_PAGES \
BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBF_MAX_SEGMENTS_PER_REQUEST) \
* XBF_MAX_REQUESTS)
struct xlbd_type_info
{
@ -62,19 +98,19 @@ struct xb_command {
#define XB_ON_XBQ_COMPLETE (1<<5)
#define XB_ON_XBQ_MASK ((1<<2)|(1<<3)|(1<<4)|(1<<5))
bus_dmamap_t map;
blkif_request_t req;
uint64_t id;
grant_ref_t *sg_refs;
struct bio *bp;
grant_ref_t gref_head;
void *data;
size_t datalen;
u_int nseg;
int operation;
blkif_sector_t sector_number;
int status;
void (* cm_complete)(struct xb_command *);
};
#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
#define XBQ_FREE 0
#define XBQ_BIO 1
#define XBQ_READY 2
@ -108,10 +144,14 @@ struct xb_softc {
int vdevice;
blkif_vdev_t handle;
int connected;
int ring_ref;
u_int ring_pages;
uint32_t max_requests;
uint32_t max_request_segments;
uint32_t max_request_blocks;
uint32_t max_request_size;
grant_ref_t ring_ref[XBF_MAX_RING_PAGES];
blkif_front_ring_t ring;
unsigned int irq;
struct xlbd_major_info *mi;
struct gnttab_free_callback callback;
TAILQ_HEAD(,xb_command) cm_free;
TAILQ_HEAD(,xb_command) cm_ready;
@ -126,11 +166,12 @@ struct xb_softc {
*/
int users;
struct mtx xb_io_lock;
struct xb_command shadow[BLK_RING_SIZE];
struct xb_command *shadow;
};
int xlvbd_add(struct xb_softc *, blkif_sector_t capacity, int device,
uint16_t vdisk_info, uint16_t sector_size);
int xlvbd_add(struct xb_softc *, blkif_sector_t sectors, int device,
uint16_t vdisk_info, unsigned long sector_size);
void xlvbd_del(struct xb_softc *);
#define XBQ_ADD(sc, qname) \
@ -188,7 +229,8 @@ void xlvbd_del(struct xb_softc *);
struct xb_command *cm; \
\
if ((cm = TAILQ_FIRST(&sc->cm_ ## name)) != NULL) { \
if ((cm->cm_flags & XB_ON_ ## index) == 0) { \
if ((cm->cm_flags & XB_ON_XBQ_MASK) != \
XB_ON_ ## index) { \
printf("command %p not in queue, " \
"flags = %#x, bit = %#x\n", cm, \
cm->cm_flags, XB_ON_ ## index); \
@ -203,7 +245,7 @@ void xlvbd_del(struct xb_softc *);
static __inline void \
xb_remove_ ## name (struct xb_command *cm) \
{ \
if ((cm->cm_flags & XB_ON_ ## index) == 0) { \
if ((cm->cm_flags & XB_ON_XBQ_MASK) != XB_ON_ ## index){\
printf("command %p not in queue, flags = %#x, " \
"bit = %#x\n", cm, cm->cm_flags, \
XB_ON_ ## index); \

View File

@ -0,0 +1,493 @@
/*-
* Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions, and the following disclaimer,
* without modification.
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
* substantially similar to the "NO WARRANTY" disclaimer below
* ("Disclaimer") and any redistribution must be conditioned upon
* including a substantially similar Disclaimer requirement for further
* binary redistribution.
*
* NO WARRANTY
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGES.
*/
/*-
* PV suspend/resume support:
*
* Copyright (c) 2004 Christian Limpach.
* Copyright (c) 2004-2006,2008 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Christian Limpach.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* HVM suspend/resume support:
*
* Copyright (c) 2008 Citrix Systems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/**
* \file control.c
*
* \brief Device driver to repond to control domain events that impact
* this VM.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/bio.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/disk.h>
#include <sys/fcntl.h>
#include <sys/filedesc.h>
#include <sys/kdb.h>
#include <sys/module.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/reboot.h>
#include <sys/rman.h>
#include <sys/taskqueue.h>
#include <sys/types.h>
#include <sys/vnode.h>
#ifndef XENHVM
#include <sys/sched.h>
#include <sys/smp.h>
#endif
#include <geom/geom.h>
#include <machine/_inttypes.h>
#include <machine/xen/xen-os.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <xen/blkif.h>
#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
#include <xen/interface/event_channel.h>
#include <xen/interface/grant_table.h>
#include <xen/xenbus/xenbusvar.h>
#define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*(x)))
/*--------------------------- Forward Declarations --------------------------*/
/** Function signature for shutdown event handlers. */
typedef void (xctrl_shutdown_handler_t)(void);
static xctrl_shutdown_handler_t xctrl_poweroff;
static xctrl_shutdown_handler_t xctrl_reboot;
static xctrl_shutdown_handler_t xctrl_suspend;
static xctrl_shutdown_handler_t xctrl_crash;
static xctrl_shutdown_handler_t xctrl_halt;
/*-------------------------- Private Data Structures -------------------------*/
/** Element type for lookup table of event name to handler. */
struct xctrl_shutdown_reason {
const char *name;
xctrl_shutdown_handler_t *handler;
};
/** Lookup table for shutdown event name to handler. */
static struct xctrl_shutdown_reason xctrl_shutdown_reasons[] = {
{ "poweroff", xctrl_poweroff },
{ "reboot", xctrl_reboot },
{ "suspend", xctrl_suspend },
{ "crash", xctrl_crash },
{ "halt", xctrl_halt },
};
struct xctrl_softc {
/** Must be first */
struct xs_watch xctrl_watch;
};
/*------------------------------ Event Handlers ------------------------------*/
static void
xctrl_poweroff()
{
shutdown_nice(RB_POWEROFF|RB_HALT);
}
static void
xctrl_reboot()
{
shutdown_nice(0);
}
#ifndef XENHVM
extern void xencons_suspend(void);
extern void xencons_resume(void);
/* Full PV mode suspension. */
static void
xctrl_suspend()
{
int i, j, k, fpp;
unsigned long max_pfn, start_info_mfn;
#ifdef SMP
cpumask_t map;
/*
* Bind us to CPU 0 and stop any other VCPUs.
*/
thread_lock(curthread);
sched_bind(curthread, 0);
thread_unlock(curthread);
KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0"));
map = PCPU_GET(other_cpus) & ~stopped_cpus;
if (map)
stop_cpus(map);
#endif
if (DEVICE_SUSPEND(root_bus) != 0) {
printf("xen_suspend: device_suspend failed\n");
#ifdef SMP
if (map)
restart_cpus(map);
#endif
return;
}
local_irq_disable();
xencons_suspend();
gnttab_suspend();
max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
void *shared_info = HYPERVISOR_shared_info;
HYPERVISOR_shared_info = NULL;
pmap_kremove((vm_offset_t) shared_info);
PT_UPDATES_FLUSH();
xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn);
/*
* We'll stop somewhere inside this hypercall. When it returns,
* we'll start resuming after the restore.
*/
start_info_mfn = VTOMFN(xen_start_info);
pmap_suspend();
HYPERVISOR_suspend(start_info_mfn);
pmap_resume();
pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
HYPERVISOR_shared_info = shared_info;
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
VTOMFN(xen_pfn_to_mfn_frame_list_list);
fpp = PAGE_SIZE/sizeof(unsigned long);
for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
if ((j % fpp) == 0) {
k++;
xen_pfn_to_mfn_frame_list_list[k] =
VTOMFN(xen_pfn_to_mfn_frame_list[k]);
j = 0;
}
xen_pfn_to_mfn_frame_list[k][j] =
VTOMFN(&xen_phys_machine[i]);
}
HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
gnttab_resume();
irq_resume();
local_irq_enable();
xencons_resume();
#ifdef CONFIG_SMP
for_each_cpu(i)
vcpu_prepare(i);
#endif
/*
* Only resume xenbus /after/ we've prepared our VCPUs; otherwise
* the VCPU hotplug callback can race with our vcpu_prepare
*/
DEVICE_RESUME(root_bus);
#ifdef SMP
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
if (map)
restart_cpus(map);
#endif
}
static void
xen_pv_shutdown_final(void *arg, int howto)
{
/*
* Inform the hypervisor that shutdown is complete.
* This is not necessary in HVM domains since Xen
* emulates ACPI in that mode and FreeBSD's ACPI
* support will request this transition.
*/
if (howto & (RB_HALT | RB_POWEROFF))
HYPERVISOR_shutdown(SHUTDOWN_poweroff);
else
HYPERVISOR_shutdown(SHUTDOWN_reboot);
}
#else
extern void xenpci_resume(void);
/* HVM mode suspension. */
static void
xctrl_suspend()
{
int suspend_cancelled;
if (DEVICE_SUSPEND(root_bus)) {
printf("xen_suspend: device_suspend failed\n");
return;
}
/*
* Make sure we don't change cpus or switch to some other
* thread. for the duration.
*/
critical_enter();
/*
* Prevent any races with evtchn_interrupt() handler.
*/
irq_suspend();
disable_intr();
suspend_cancelled = HYPERVISOR_suspend(0);
if (!suspend_cancelled)
xenpci_resume();
/*
* Re-enable interrupts and put the scheduler back to normal.
*/
enable_intr();
critical_exit();
/*
* FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
* similar.
*/
if (!suspend_cancelled)
DEVICE_RESUME(root_bus);
}
#endif
static void
xctrl_crash()
{
panic("Xen directed crash");
}
static void
xctrl_halt()
{
shutdown_nice(RB_HALT);
}
/*------------------------------ Event Reception -----------------------------*/
static void
xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len)
{
struct xctrl_shutdown_reason *reason;
struct xctrl_shutdown_reason *last_reason;
char *result;
int error;
int result_len;
error = xs_read(XST_NIL, "control", "shutdown",
&result_len, (void **)&result);
if (error != 0)
return;
reason = xctrl_shutdown_reasons;
last_reason = reason + NUM_ELEMENTS(xctrl_shutdown_reasons);
while (reason < last_reason) {
if (!strcmp(result, reason->name)) {
reason->handler();
break;
}
reason++;
}
free(result, M_XENSTORE);
}
/*------------------ Private Device Attachment Functions --------------------*/
/**
* \brief Identify instances of this device type in the system.
*
* \param driver The driver performing this identify action.
* \param parent The NewBus parent device for any devices this method adds.
*/
static void
xctrl_identify(driver_t *driver __unused, device_t parent)
{
/*
* A single device instance for our driver is always present
* in a system operating under Xen.
*/
BUS_ADD_CHILD(parent, 0, driver->name, 0);
}
/**
* \brief Probe for the existance of the Xen Control device
*
* \param dev NewBus device_t for this Xen control instance.
*
* \return Always returns 0 indicating success.
*/
static int
xctrl_probe(device_t dev)
{
device_set_desc(dev, "Xen Control Device");
return (0);
}
/**
* \brief Attach the Xen control device.
*
* \param dev NewBus device_t for this Xen control instance.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
static int
xctrl_attach(device_t dev)
{
struct xctrl_softc *xctrl;
xctrl = device_get_softc(dev);
/* Activate watch */
xctrl->xctrl_watch.node = "control/shutdown";
xctrl->xctrl_watch.callback = xctrl_on_watch_event;
xs_register_watch(&xctrl->xctrl_watch);
#ifndef XENHVM
EVENTHANDLER_REGISTER(shutdown_final, xen_pv_shutdown_final, NULL,
SHUTDOWN_PRI_LAST);
#endif
return (0);
}
/**
* \brief Detach the Xen control device.
*
* \param dev NewBus device_t for this Xen control device instance.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
static int
xctrl_detach(device_t dev)
{
struct xctrl_softc *xctrl;
xctrl = device_get_softc(dev);
/* Release watch */
xs_unregister_watch(&xctrl->xctrl_watch);
return (0);
}
/*-------------------- Private Device Attachment Data -----------------------*/
static device_method_t xctrl_methods[] = {
/* Device interface */
DEVMETHOD(device_identify, xctrl_identify),
DEVMETHOD(device_probe, xctrl_probe),
DEVMETHOD(device_attach, xctrl_attach),
DEVMETHOD(device_detach, xctrl_detach),
{ 0, 0 }
};
DEFINE_CLASS_0(xctrl, xctrl_driver, xctrl_methods, sizeof(struct xctrl_softc));
devclass_t xctrl_devclass;
DRIVER_MODULE(xctrl, xenstore, xctrl_driver, xctrl_devclass, 0, 0);

View File

@ -91,8 +91,6 @@ __FBSDID("$FreeBSD$");
#define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP | CSUM_TSO)
#define GRANT_INVALID_REF 0
#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
@ -373,7 +371,8 @@ xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
{
int i = xennet_rxidx(ri);
grant_ref_t ref = np->grant_rx_ref[i];
np->grant_rx_ref[i] = GRANT_INVALID_REF;
KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n"));
np->grant_rx_ref[i] = GRANT_REF_INVALID;
return ref;
}
@ -404,7 +403,7 @@ xen_net_read_mac(device_t dev, uint8_t mac[])
int error, i;
char *s, *e, *macstr;
error = xenbus_read(XBT_NIL, xenbus_get_node(dev), "mac", NULL,
error = xs_read(XST_NIL, xenbus_get_node(dev), "mac", NULL,
(void **) &macstr);
if (error)
return (error);
@ -413,12 +412,12 @@ xen_net_read_mac(device_t dev, uint8_t mac[])
for (i = 0; i < ETHER_ADDR_LEN; i++) {
mac[i] = strtoul(s, &e, 16);
if (s == e || (e[0] != ':' && e[0] != 0)) {
free(macstr, M_DEVBUF);
free(macstr, M_XENBUS);
return (ENOENT);
}
s = &e[1];
}
free(macstr, M_DEVBUF);
free(macstr, M_XENBUS);
return (0);
}
@ -483,7 +482,7 @@ static int
talk_to_backend(device_t dev, struct netfront_info *info)
{
const char *message;
struct xenbus_transaction xbt;
struct xs_transaction xst;
const char *node = xenbus_get_node(dev);
int err;
@ -499,54 +498,54 @@ talk_to_backend(device_t dev, struct netfront_info *info)
goto out;
again:
err = xenbus_transaction_start(&xbt);
err = xs_transaction_start(&xst);
if (err) {
xenbus_dev_fatal(dev, err, "starting transaction");
goto destroy_ring;
}
err = xenbus_printf(xbt, node, "tx-ring-ref","%u",
err = xs_printf(xst, node, "tx-ring-ref","%u",
info->tx_ring_ref);
if (err) {
message = "writing tx ring-ref";
goto abort_transaction;
}
err = xenbus_printf(xbt, node, "rx-ring-ref","%u",
err = xs_printf(xst, node, "rx-ring-ref","%u",
info->rx_ring_ref);
if (err) {
message = "writing rx ring-ref";
goto abort_transaction;
}
err = xenbus_printf(xbt, node,
err = xs_printf(xst, node,
"event-channel", "%u", irq_to_evtchn_port(info->irq));
if (err) {
message = "writing event-channel";
goto abort_transaction;
}
err = xenbus_printf(xbt, node, "request-rx-copy", "%u",
err = xs_printf(xst, node, "request-rx-copy", "%u",
info->copying_receiver);
if (err) {
message = "writing request-rx-copy";
goto abort_transaction;
}
err = xenbus_printf(xbt, node, "feature-rx-notify", "%d", 1);
err = xs_printf(xst, node, "feature-rx-notify", "%d", 1);
if (err) {
message = "writing feature-rx-notify";
goto abort_transaction;
}
err = xenbus_printf(xbt, node, "feature-sg", "%d", 1);
err = xs_printf(xst, node, "feature-sg", "%d", 1);
if (err) {
message = "writing feature-sg";
goto abort_transaction;
}
#if __FreeBSD_version >= 700000
err = xenbus_printf(xbt, node, "feature-gso-tcpv4", "%d", 1);
err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1);
if (err) {
message = "writing feature-gso-tcpv4";
goto abort_transaction;
}
#endif
err = xenbus_transaction_end(xbt, 0);
err = xs_transaction_end(xst, 0);
if (err) {
if (err == EAGAIN)
goto again;
@ -557,7 +556,7 @@ talk_to_backend(device_t dev, struct netfront_info *info)
return 0;
abort_transaction:
xenbus_transaction_end(xbt, 1);
xs_transaction_end(xst, 1);
xenbus_dev_fatal(dev, err, "%s", message);
destroy_ring:
netif_free(info);
@ -576,8 +575,8 @@ setup_device(device_t dev, struct netfront_info *info)
ifp = info->xn_ifp;
info->tx_ring_ref = GRANT_INVALID_REF;
info->rx_ring_ref = GRANT_INVALID_REF;
info->tx_ring_ref = GRANT_REF_INVALID;
info->rx_ring_ref = GRANT_REF_INVALID;
info->rx.sring = NULL;
info->tx.sring = NULL;
info->irq = 0;
@ -750,7 +749,7 @@ netif_release_tx_bufs(struct netfront_info *np)
GNTMAP_readonly);
gnttab_release_grant_reference(&np->gref_tx_head,
np->grant_tx_ref[i]);
np->grant_tx_ref[i] = GRANT_INVALID_REF;
np->grant_tx_ref[i] = GRANT_REF_INVALID;
add_id_to_freelist(np->tx_mbufs, i);
np->xn_cdata.xn_tx_chain_cnt--;
if (np->xn_cdata.xn_tx_chain_cnt < 0) {
@ -854,7 +853,8 @@ refill:
sc->rx_mbufs[id] = m_new;
ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
KASSERT((short)ref >= 0, ("negative ref"));
KASSERT(ref != GNTTAB_LIST_END,
("reserved grant references exhuasted"));
sc->grant_rx_ref[id] = ref;
vaddr = mtod(m_new, vm_offset_t);
@ -1135,7 +1135,7 @@ xn_txeof(struct netfront_info *np)
np->grant_tx_ref[id]);
gnttab_release_grant_reference(
&np->gref_tx_head, np->grant_tx_ref[id]);
np->grant_tx_ref[id] = GRANT_INVALID_REF;
np->grant_tx_ref[id] = GRANT_REF_INVALID;
np->tx_mbufs[id] = NULL;
add_id_to_freelist(np->tx_mbufs, id);
@ -1318,12 +1318,13 @@ xennet_get_responses(struct netfront_info *np,
* the backend driver. In future this should flag the bad
* situation to the system controller to reboot the backed.
*/
if (ref == GRANT_INVALID_REF) {
if (ref == GRANT_REF_INVALID) {
#if 0
if (net_ratelimit())
WPRINTK("Bad rx response id %d.\n", rx->id);
#endif
printf("%s: Bad rx response id %d.\n", __func__,rx->id);
err = EINVAL;
goto next;
}
@ -1384,7 +1385,7 @@ next_skip_queue:
err = ENOENT;
printf("%s: cons %u frags %u rp %u, not enough frags\n",
__func__, *cons, frags, rp);
break;
break;
}
/*
* Note that m can be NULL, if rx->status < 0 or if
@ -1526,6 +1527,11 @@ xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head)
* tell the TCP stack to generate a shorter chain of packets.
*/
if (nfrags > MAX_TX_REQ_FRAGS) {
#ifdef DEBUG
printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback "
"won't be able to handle it, dropping\n",
__func__, nfrags, MAX_TX_REQ_FRAGS);
#endif
m_freem(m_head);
return (EMSGSIZE);
}
@ -1881,11 +1887,11 @@ network_connect(struct netfront_info *np)
netif_rx_request_t *req;
u_int feature_rx_copy, feature_rx_flip;
error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev),
error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
"feature-rx-copy", NULL, "%u", &feature_rx_copy);
if (error)
feature_rx_copy = 0;
error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev),
error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
"feature-rx-flip", NULL, "%u", &feature_rx_flip);
if (error)
feature_rx_flip = 1;
@ -1999,14 +2005,14 @@ create_netdev(device_t dev)
/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
for (i = 0; i <= NET_TX_RING_SIZE; i++) {
np->tx_mbufs[i] = (void *) ((u_long) i+1);
np->grant_tx_ref[i] = GRANT_INVALID_REF;
np->grant_tx_ref[i] = GRANT_REF_INVALID;
}
np->tx_mbufs[NET_TX_RING_SIZE] = (void *)0;
for (i = 0; i <= NET_RX_RING_SIZE; i++) {
np->rx_mbufs[i] = NULL;
np->grant_rx_ref[i] = GRANT_INVALID_REF;
np->grant_rx_ref[i] = GRANT_REF_INVALID;
}
/* A grant for every tx ring slot */
if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
@ -2128,8 +2134,8 @@ netif_disconnect_backend(struct netfront_info *info)
end_access(info->tx_ring_ref, info->tx.sring);
end_access(info->rx_ring_ref, info->rx.sring);
info->tx_ring_ref = GRANT_INVALID_REF;
info->rx_ring_ref = GRANT_INVALID_REF;
info->tx_ring_ref = GRANT_REF_INVALID;
info->rx_ring_ref = GRANT_REF_INVALID;
info->tx.sring = NULL;
info->rx.sring = NULL;
@ -2143,7 +2149,7 @@ netif_disconnect_backend(struct netfront_info *info)
static void
end_access(int ref, void *page)
{
if (ref != GRANT_INVALID_REF)
if (ref != GRANT_REF_INVALID)
gnttab_end_foreign_access(ref, page);
}
@ -2171,7 +2177,7 @@ static device_method_t netfront_methods[] = {
DEVMETHOD(device_resume, netfront_resume),
/* Xenbus interface */
DEVMETHOD(xenbus_backend_changed, netfront_backend_changed),
DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed),
{ 0, 0 }
};
@ -2183,4 +2189,4 @@ static driver_t netfront_driver = {
};
devclass_t netfront_devclass;
DRIVER_MODULE(xe, xenbus, netfront_driver, netfront_devclass, 0, 0);
DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, 0, 0);

View File

@ -181,6 +181,49 @@ bind_listening_port_to_irqhandler(unsigned int remote_domain,
return (0);
}
int
bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
unsigned int remote_port, const char *devname, driver_intr_t handler,
void *arg, unsigned long irqflags, unsigned int *irqp)
{
struct evtchn_bind_interdomain bind_interdomain;
unsigned int irq;
int error;
irq = alloc_xen_irq();
if (irq < 0)
return irq;
mtx_lock(&irq_evtchn[irq].lock);
bind_interdomain.remote_dom = remote_domain;
bind_interdomain.remote_port = remote_port;
error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
&bind_interdomain);
if (error) {
mtx_unlock(&irq_evtchn[irq].lock);
free_xen_irq(irq);
return (-error);
}
irq_evtchn[irq].handler = handler;
irq_evtchn[irq].arg = arg;
irq_evtchn[irq].evtchn = bind_interdomain.local_port;
irq_evtchn[irq].close = 1;
irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0;
evtchn_to_irq[bind_interdomain.local_port] = irq;
unmask_evtchn(bind_interdomain.local_port);
mtx_unlock(&irq_evtchn[irq].lock);
if (irqp)
*irqp = irq;
return (0);
}
int
bind_caller_port_to_irqhandler(unsigned int caller_port,
const char *devname, driver_intr_t handler, void *arg,

View File

@ -66,6 +66,7 @@ __FBSDID("$FreeBSD$");
char *hypercall_stubs;
shared_info_t *HYPERVISOR_shared_info;
static vm_paddr_t shared_info_pa;
static device_t nexus;
/*
* This is used to find our platform device instance.
@ -80,7 +81,7 @@ xenpci_cpuid_base(void)
{
uint32_t base, regs[4];
for (base = 0x40000000; base < 0x40001000; base += 0x100) {
for (base = 0x40000000; base < 0x40010000; base += 0x100) {
do_cpuid(base, regs);
if (!memcmp("XenVMMXenVMM", &regs[1], 12)
&& (regs[0] - base) >= 2)
@ -204,14 +205,21 @@ xenpci_allocate_resources(device_t dev)
scp->res_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
&scp->rid_irq, RF_SHAREABLE|RF_ACTIVE);
if (scp->res_irq == NULL)
if (scp->res_irq == NULL) {
printf("xenpci Could not allocate irq.\n");
goto errexit;
}
scp->rid_memory = PCIR_BAR(1);
scp->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&scp->rid_memory, RF_ACTIVE);
if (scp->res_memory == NULL)
if (scp->res_memory == NULL) {
printf("xenpci Could not allocate memory bar.\n");
goto errexit;
}
scp->phys_next = rman_get_start(scp->res_memory);
return (0);
errexit:
@ -254,6 +262,36 @@ xenpci_alloc_space(size_t sz, vm_paddr_t *pa)
}
}
static struct resource *
xenpci_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
return (BUS_ALLOC_RESOURCE(nexus, child, type, rid, start,
end, count, flags));
}
static int
xenpci_release_resource(device_t dev, device_t child, int type, int rid,
struct resource *r)
{
return (BUS_RELEASE_RESOURCE(nexus, child, type, rid, r));
}
static int
xenpci_activate_resource(device_t dev, device_t child, int type, int rid,
struct resource *r)
{
return (BUS_ACTIVATE_RESOURCE(nexus, child, type, rid, r));
}
static int
xenpci_deactivate_resource(device_t dev, device_t child, int type,
int rid, struct resource *r)
{
return (BUS_DEACTIVATE_RESOURCE(nexus, child, type, rid, r));
}
/*
* Called very early in the resume sequence - reinitialise the various
* bits of Xen machinery including the hypercall page and the shared
@ -303,20 +341,36 @@ xenpci_probe(device_t dev)
static int
xenpci_attach(device_t dev)
{
int error;
int error;
struct xenpci_softc *scp = device_get_softc(dev);
struct xen_add_to_physmap xatp;
vm_offset_t shared_va;
devclass_t dc;
/*
* Find and record nexus0. Since we are not really on the
* PCI bus, all resource operations are directed to nexus
* instead of through our parent.
*/
if ((dc = devclass_find("nexus")) == 0
|| (nexus = devclass_get_device(dc, 0)) == 0) {
device_printf(dev, "unable to find nexus.");
return (ENOENT);
}
error = xenpci_allocate_resources(dev);
if (error)
if (error) {
device_printf(dev, "xenpci_allocate_resources failed(%d).\n",
error);
goto errexit;
scp->phys_next = rman_get_start(scp->res_memory);
}
error = xenpci_init_hypercall_stubs(dev, scp);
if (error)
if (error) {
device_printf(dev, "xenpci_init_hypercall_stubs failed(%d).\n",
error);
goto errexit;
}
setup_xen_features();
@ -346,7 +400,7 @@ errexit:
* Undo anything we may have done.
*/
xenpci_deallocate_resources(dev);
return (error);
return (error);
}
/*
@ -364,8 +418,9 @@ xenpci_detach(device_t dev)
*/
if (scp->intr_cookie != NULL) {
if (BUS_TEARDOWN_INTR(parent, dev,
scp->res_irq, scp->intr_cookie) != 0)
printf("intr teardown failed.. continuing\n");
scp->res_irq, scp->intr_cookie) != 0)
device_printf(dev,
"intr teardown failed.. continuing\n");
scp->intr_cookie = NULL;
}
@ -386,6 +441,10 @@ static device_method_t xenpci_methods[] = {
/* Bus interface */
DEVMETHOD(bus_add_child, bus_generic_add_child),
DEVMETHOD(bus_alloc_resource, xenpci_alloc_resource),
DEVMETHOD(bus_release_resource, xenpci_release_resource),
DEVMETHOD(bus_activate_resource, xenpci_activate_resource),
DEVMETHOD(bus_deactivate_resource, xenpci_deactivate_resource),
{ 0, 0 }
};

View File

@ -1933,7 +1933,15 @@ again:
vn_lock(vp,
LK_EXCLUSIVE |
LK_RETRY);
r = VOP_LOOKUP(vp, &nvp, &cn);
if ((vp->v_vflag & VV_ROOT) != 0
&& (cn.cn_flags & ISDOTDOT)
!= 0) {
vref(vp);
nvp = vp;
r = 0;
} else
r = VOP_LOOKUP(vp, &nvp,
&cn);
}
}
if (!r) {

View File

@ -106,7 +106,7 @@ struct g_class g_eli_class = {
/*
* Code paths:
* BIO_READ:
* g_eli_start -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
* g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
* BIO_WRITE:
* g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
*/
@ -148,7 +148,7 @@ g_eli_crypto_rerun(struct cryptop *crp)
/*
* The function is called afer reading encrypted data from the provider.
*
* g_eli_start -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
* g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
*/
void
g_eli_read_done(struct bio *bp)
@ -167,6 +167,7 @@ g_eli_read_done(struct bio *bp)
if (pbp->bio_inbed < pbp->bio_children)
return;
g_destroy_bio(bp);
sc = pbp->bio_to->geom->softc;
if (pbp->bio_error != 0) {
G_ELI_LOGREQ(0, pbp, "%s() failed", __func__);
pbp->bio_completed = 0;
@ -175,9 +176,9 @@ g_eli_read_done(struct bio *bp)
pbp->bio_driver2 = NULL;
}
g_io_deliver(pbp, pbp->bio_error);
atomic_subtract_int(&sc->sc_inflight, 1);
return;
}
sc = pbp->bio_to->geom->softc;
mtx_lock(&sc->sc_queue_mtx);
bioq_insert_tail(&sc->sc_queue, pbp);
mtx_unlock(&sc->sc_queue_mtx);
@ -192,6 +193,7 @@ g_eli_read_done(struct bio *bp)
void
g_eli_write_done(struct bio *bp)
{
struct g_eli_softc *sc;
struct bio *pbp;
G_ELI_LOGREQ(2, bp, "Request done.");
@ -218,7 +220,9 @@ g_eli_write_done(struct bio *bp)
* Write is finished, send it up.
*/
pbp->bio_completed = pbp->bio_length;
sc = pbp->bio_to->geom->softc;
g_io_deliver(pbp, pbp->bio_error);
atomic_subtract_int(&sc->sc_inflight, 1);
}
/*
@ -241,12 +245,14 @@ g_eli_orphan(struct g_consumer *cp)
sc = cp->geom->softc;
if (sc == NULL)
return;
g_eli_destroy(sc, 1);
g_eli_destroy(sc, TRUE);
}
/*
* BIO_READ : G_ELI_START -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
* BIO_WRITE: G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
* BIO_READ:
* G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
* BIO_WRITE:
* G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
*/
static void
g_eli_start(struct bio *bp)
@ -282,24 +288,16 @@ g_eli_start(struct bio *bp)
g_io_deliver(bp, ENOMEM);
return;
}
bp->bio_driver1 = cbp;
bp->bio_pflags = G_ELI_NEW_BIO;
switch (bp->bio_cmd) {
case BIO_READ:
if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
bp->bio_driver2 = NULL;
cbp->bio_done = g_eli_read_done;
cp = LIST_FIRST(&sc->sc_geom->consumer);
cbp->bio_to = cp->provider;
G_ELI_LOGREQ(2, cbp, "Sending request.");
/*
* Read encrypted data from provider.
*/
g_io_request(cbp, cp);
g_eli_crypto_read(sc, bp, 0);
break;
}
bp->bio_pflags = 255;
/* FALLTHROUGH */
case BIO_WRITE:
bp->bio_driver1 = cbp;
mtx_lock(&sc->sc_queue_mtx);
bioq_insert_tail(&sc->sc_queue, bp);
mtx_unlock(&sc->sc_queue_mtx);
@ -316,6 +314,104 @@ g_eli_start(struct bio *bp)
}
}
static int
g_eli_newsession(struct g_eli_worker *wr)
{
struct g_eli_softc *sc;
struct cryptoini crie, cria;
int error;
sc = wr->w_softc;
bzero(&crie, sizeof(crie));
crie.cri_alg = sc->sc_ealgo;
crie.cri_klen = sc->sc_ekeylen;
if (sc->sc_ealgo == CRYPTO_AES_XTS)
crie.cri_klen <<= 1;
crie.cri_key = sc->sc_ekeys[0];
if (sc->sc_flags & G_ELI_FLAG_AUTH) {
bzero(&cria, sizeof(cria));
cria.cri_alg = sc->sc_aalgo;
cria.cri_klen = sc->sc_akeylen;
cria.cri_key = sc->sc_akey;
crie.cri_next = &cria;
}
switch (sc->sc_crypto) {
case G_ELI_CRYPTO_SW:
error = crypto_newsession(&wr->w_sid, &crie,
CRYPTOCAP_F_SOFTWARE);
break;
case G_ELI_CRYPTO_HW:
error = crypto_newsession(&wr->w_sid, &crie,
CRYPTOCAP_F_HARDWARE);
break;
case G_ELI_CRYPTO_UNKNOWN:
error = crypto_newsession(&wr->w_sid, &crie,
CRYPTOCAP_F_HARDWARE);
if (error == 0) {
mtx_lock(&sc->sc_queue_mtx);
if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
sc->sc_crypto = G_ELI_CRYPTO_HW;
mtx_unlock(&sc->sc_queue_mtx);
} else {
error = crypto_newsession(&wr->w_sid, &crie,
CRYPTOCAP_F_SOFTWARE);
mtx_lock(&sc->sc_queue_mtx);
if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
sc->sc_crypto = G_ELI_CRYPTO_SW;
mtx_unlock(&sc->sc_queue_mtx);
}
break;
default:
panic("%s: invalid condition", __func__);
}
return (error);
}
static void
g_eli_freesession(struct g_eli_worker *wr)
{
crypto_freesession(wr->w_sid);
}
static void
g_eli_cancel(struct g_eli_softc *sc)
{
struct bio *bp;
mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
KASSERT(bp->bio_pflags == G_ELI_NEW_BIO,
("Not new bio when canceling (bp=%p).", bp));
g_io_deliver(bp, ENXIO);
}
}
static struct bio *
g_eli_takefirst(struct g_eli_softc *sc)
{
struct bio *bp;
mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
return (bioq_takefirst(&sc->sc_queue));
/*
* Device suspended, so we skip new I/O requests.
*/
TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
if (bp->bio_pflags != G_ELI_NEW_BIO)
break;
}
if (bp != NULL)
bioq_remove(&sc->sc_queue, bp);
return (bp);
}
/*
* This is the main function for kernel worker thread when we don't have
* hardware acceleration and we have to do cryptography in software.
@ -328,6 +424,7 @@ g_eli_worker(void *arg)
struct g_eli_softc *sc;
struct g_eli_worker *wr;
struct bio *bp;
int error;
wr = arg;
sc = wr->w_softc;
@ -349,11 +446,13 @@ g_eli_worker(void *arg)
for (;;) {
mtx_lock(&sc->sc_queue_mtx);
bp = bioq_takefirst(&sc->sc_queue);
again:
bp = g_eli_takefirst(sc);
if (bp == NULL) {
if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
g_eli_cancel(sc);
LIST_REMOVE(wr, w_next);
crypto_freesession(wr->w_sid);
g_eli_freesession(wr);
free(wr, M_ELI);
G_ELI_DEBUG(1, "Thread %s exiting.",
curthread->td_proc->p_comm);
@ -361,16 +460,63 @@ g_eli_worker(void *arg)
mtx_unlock(&sc->sc_queue_mtx);
kproc_exit(0);
}
while (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
if (sc->sc_inflight > 0) {
G_ELI_DEBUG(0, "inflight=%d", sc->sc_inflight);
/*
* We still have inflight BIOs, so
* sleep and retry.
*/
msleep(sc, &sc->sc_queue_mtx, PRIBIO,
"geli:inf", hz / 5);
goto again;
}
/*
* Suspend requested, mark the worker as
* suspended and go to sleep.
*/
if (wr->w_active) {
g_eli_freesession(wr);
wr->w_active = FALSE;
}
wakeup(&sc->sc_workers);
msleep(sc, &sc->sc_queue_mtx, PRIBIO,
"geli:suspend", 0);
if (!wr->w_active &&
!(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
error = g_eli_newsession(wr);
KASSERT(error == 0,
("g_eli_newsession() failed on resume (error=%d)",
error));
wr->w_active = TRUE;
}
goto again;
}
msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0);
continue;
}
if (bp->bio_pflags == G_ELI_NEW_BIO)
atomic_add_int(&sc->sc_inflight, 1);
mtx_unlock(&sc->sc_queue_mtx);
if (bp->bio_cmd == BIO_READ && bp->bio_pflags == 255)
g_eli_auth_read(sc, bp);
else if (sc->sc_flags & G_ELI_FLAG_AUTH)
g_eli_auth_run(wr, bp);
else
g_eli_crypto_run(wr, bp);
if (bp->bio_pflags == G_ELI_NEW_BIO) {
bp->bio_pflags = 0;
if (sc->sc_flags & G_ELI_FLAG_AUTH) {
if (bp->bio_cmd == BIO_READ)
g_eli_auth_read(sc, bp);
else
g_eli_auth_run(wr, bp);
} else {
if (bp->bio_cmd == BIO_READ)
g_eli_crypto_read(sc, bp, 1);
else
g_eli_crypto_run(wr, bp);
}
} else {
if (sc->sc_flags & G_ELI_FLAG_AUTH)
g_eli_auth_run(wr, bp);
else
g_eli_crypto_run(wr, bp);
}
}
}
@ -500,7 +646,7 @@ g_eli_last_close(struct g_eli_softc *sc)
gp = sc->sc_geom;
pp = LIST_FIRST(&gp->provider);
strlcpy(ppname, pp->name, sizeof(ppname));
error = g_eli_destroy(sc, 1);
error = g_eli_destroy(sc, TRUE);
KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
ppname, error));
G_ELI_DEBUG(0, "Detached %s on last close.", ppname);
@ -557,7 +703,6 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
struct g_geom *gp;
struct g_provider *pp;
struct g_consumer *cp;
struct cryptoini crie, cria;
u_int i, threads;
int error;
@ -584,7 +729,8 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
else
gp->access = g_std_access;
sc->sc_crypto = G_ELI_CRYPTO_SW;
sc->sc_inflight = 0;
sc->sc_crypto = G_ELI_CRYPTO_UNKNOWN;
sc->sc_flags = md->md_flags;
/* Backward compatibility. */
if (md->md_version < 4)
@ -612,14 +758,6 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
sc->sc_bytes_per_sector =
(md->md_sectorsize - 1) / sc->sc_data_per_sector + 1;
sc->sc_bytes_per_sector *= bpp->sectorsize;
/*
* Precalculate SHA256 for HMAC key generation.
* This is expensive operation and we can do it only once now or
* for every access to sector, so now will be much better.
*/
SHA256_Init(&sc->sc_akeyctx);
SHA256_Update(&sc->sc_akeyctx, sc->sc_akey,
sizeof(sc->sc_akey));
}
gp->softc = sc;
@ -679,7 +817,16 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
*/
g_eli_mkey_propagate(sc, mkey);
sc->sc_ekeylen = md->md_keylen;
if (sc->sc_flags & G_ELI_FLAG_AUTH) {
/*
* Precalculate SHA256 for HMAC key generation.
* This is expensive operation and we can do it only once now or
* for every access to sector, so now will be much better.
*/
SHA256_Init(&sc->sc_akeyctx);
SHA256_Update(&sc->sc_akeyctx, sc->sc_akey,
sizeof(sc->sc_akey));
}
/*
* Precalculate SHA256 for IV generation.
* This is expensive operation and we can do it only once now or for
@ -697,20 +844,6 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
LIST_INIT(&sc->sc_workers);
bzero(&crie, sizeof(crie));
crie.cri_alg = sc->sc_ealgo;
crie.cri_klen = sc->sc_ekeylen;
if (sc->sc_ealgo == CRYPTO_AES_XTS)
crie.cri_klen <<= 1;
crie.cri_key = sc->sc_ekeys[0];
if (sc->sc_flags & G_ELI_FLAG_AUTH) {
bzero(&cria, sizeof(cria));
cria.cri_alg = sc->sc_aalgo;
cria.cri_klen = sc->sc_akeylen;
cria.cri_key = sc->sc_akey;
crie.cri_next = &cria;
}
threads = g_eli_threads;
if (threads == 0)
threads = mp_ncpus;
@ -728,21 +861,9 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
wr->w_softc = sc;
wr->w_number = i;
wr->w_active = TRUE;
/*
* If this is the first pass, try to get hardware support.
* Use software cryptography, if we cannot get it.
*/
if (LIST_EMPTY(&sc->sc_workers)) {
error = crypto_newsession(&wr->w_sid, &crie,
CRYPTOCAP_F_HARDWARE);
if (error == 0)
sc->sc_crypto = G_ELI_CRYPTO_HW;
}
if (sc->sc_crypto == G_ELI_CRYPTO_SW) {
error = crypto_newsession(&wr->w_sid, &crie,
CRYPTOCAP_F_SOFTWARE);
}
error = g_eli_newsession(wr);
if (error != 0) {
free(wr, M_ELI);
if (req != NULL) {
@ -758,7 +879,7 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
"g_eli[%u] %s", i, bpp->name);
if (error != 0) {
crypto_freesession(wr->w_sid);
g_eli_freesession(wr);
free(wr, M_ELI);
if (req != NULL) {
gctl_error(req, "Cannot create kernel thread "
@ -875,7 +996,7 @@ g_eli_destroy_geom(struct gctl_req *req __unused,
struct g_eli_softc *sc;
sc = gp->softc;
return (g_eli_destroy(sc, 0));
return (g_eli_destroy(sc, FALSE));
}
static int
@ -1106,6 +1227,7 @@ g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
sbuf_printf(sb, name); \
} \
} while (0)
ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND");
ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY");
ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
@ -1167,7 +1289,7 @@ g_eli_shutdown_pre_sync(void *arg, int howto)
pp = LIST_FIRST(&gp->provider);
KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name));
if (pp->acr + pp->acw + pp->ace == 0)
error = g_eli_destroy(sc, 1);
error = g_eli_destroy(sc, TRUE);
else {
sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
gp->access = g_eli_access;

View File

@ -86,6 +86,10 @@
#define G_ELI_FLAG_NATIVE_BYTE_ORDER 0x00040000
/* Provider uses single encryption key. */
#define G_ELI_FLAG_SINGLE_KEY 0x00080000
/* Device suspended. */
#define G_ELI_FLAG_SUSPEND 0x00100000
#define G_ELI_NEW_BIO 255
#define SHA512_MDLEN 64
#define G_ELI_AUTH_SECKEYLEN SHA256_DIGEST_LENGTH
@ -109,6 +113,7 @@ extern int g_eli_debug;
extern u_int g_eli_overwrites;
extern u_int g_eli_batch;
#define G_ELI_CRYPTO_UNKNOWN 0
#define G_ELI_CRYPTO_HW 1
#define G_ELI_CRYPTO_SW 2
@ -140,6 +145,7 @@ struct g_eli_worker {
struct proc *w_proc;
u_int w_number;
uint64_t w_sid;
boolean_t w_active;
LIST_ENTRY(g_eli_worker) w_next;
};
@ -160,6 +166,7 @@ struct g_eli_softc {
SHA256_CTX sc_ivctx;
int sc_nkey;
uint32_t sc_flags;
int sc_inflight;
off_t sc_mediasize;
size_t sc_sectorsize;
u_int sc_bytes_per_sector;
@ -499,6 +506,7 @@ uint8_t *g_eli_crypto_key(struct g_eli_softc *sc, off_t offset,
void g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
size_t size);
void g_eli_crypto_read(struct g_eli_softc *sc, struct bio *bp, boolean_t fromworker);
void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp);
void g_eli_auth_read(struct g_eli_softc *sc, struct bio *bp);

View File

@ -217,7 +217,7 @@ g_eli_ctl_detach(struct gctl_req *req, struct g_class *mp)
sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
sc->sc_geom->access = g_eli_access;
} else {
error = g_eli_destroy(sc, *force);
error = g_eli_destroy(sc, *force ? TRUE : FALSE);
if (error != 0) {
gctl_error(req,
"Cannot destroy device %s (error=%d).",
@ -699,6 +699,213 @@ g_eli_ctl_delkey(struct gctl_req *req, struct g_class *mp)
G_ELI_DEBUG(1, "Key %d removed from %s.", nkey, pp->name);
}
static int
g_eli_suspend_one(struct g_eli_softc *sc)
{
struct g_eli_worker *wr;
g_topology_assert();
if (sc == NULL)
return (ENOENT);
if (sc->sc_flags & G_ELI_FLAG_ONETIME)
return (EOPNOTSUPP);
mtx_lock(&sc->sc_queue_mtx);
if (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
mtx_unlock(&sc->sc_queue_mtx);
return (EALREADY);
}
sc->sc_flags |= G_ELI_FLAG_SUSPEND;
wakeup(sc);
for (;;) {
LIST_FOREACH(wr, &sc->sc_workers, w_next) {
if (wr->w_active)
break;
}
if (wr == NULL)
break;
/* Not all threads suspended. */
msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
"geli:suspend", 0);
}
/*
* Clear sensitive data on suspend, they will be recovered on resume.
*/
bzero(sc->sc_mkey, sizeof(sc->sc_mkey));
bzero(sc->sc_ekeys,
sc->sc_nekeys * (sizeof(uint8_t *) + G_ELI_DATAKEYLEN));
free(sc->sc_ekeys, M_ELI);
sc->sc_ekeys = NULL;
bzero(sc->sc_akey, sizeof(sc->sc_akey));
bzero(&sc->sc_akeyctx, sizeof(sc->sc_akeyctx));
bzero(sc->sc_ivkey, sizeof(sc->sc_ivkey));
bzero(&sc->sc_ivctx, sizeof(sc->sc_ivctx));
mtx_unlock(&sc->sc_queue_mtx);
G_ELI_DEBUG(0, "%s has been suspended.", sc->sc_name);
return (0);
}
static void
g_eli_ctl_suspend(struct gctl_req *req, struct g_class *mp)
{
struct g_eli_softc *sc;
int *all, *nargs;
int error;
g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
return;
}
all = gctl_get_paraml(req, "all", sizeof(*all));
if (all == NULL) {
gctl_error(req, "No '%s' argument.", "all");
return;
}
if (!*all && *nargs == 0) {
gctl_error(req, "Too few arguments.");
return;
}
if (*all) {
struct g_geom *gp, *gp2;
LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
sc = gp->softc;
if (sc->sc_flags & G_ELI_FLAG_ONETIME)
continue;
error = g_eli_suspend_one(sc);
if (error != 0)
gctl_error(req, "Not fully done.");
}
} else {
const char *prov;
char param[16];
int i;
for (i = 0; i < *nargs; i++) {
snprintf(param, sizeof(param), "arg%d", i);
prov = gctl_get_asciiparam(req, param);
if (prov == NULL) {
G_ELI_DEBUG(0, "No 'arg%d' argument.", i);
continue;
}
sc = g_eli_find_device(mp, prov);
if (sc == NULL) {
G_ELI_DEBUG(0, "No such provider: %s.", prov);
continue;
}
error = g_eli_suspend_one(sc);
if (error != 0)
gctl_error(req, "Not fully done.");
}
}
}
static void
g_eli_ctl_resume(struct gctl_req *req, struct g_class *mp)
{
struct g_eli_metadata md;
struct g_eli_softc *sc;
struct g_provider *pp;
struct g_consumer *cp;
const char *name;
u_char *key, mkey[G_ELI_DATAIVKEYLEN];
int *nargs, keysize, error;
u_int nkey;
g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
return;
}
if (*nargs != 1) {
gctl_error(req, "Invalid number of arguments.");
return;
}
name = gctl_get_asciiparam(req, "arg0");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 0);
return;
}
sc = g_eli_find_device(mp, name);
if (sc == NULL) {
gctl_error(req, "Provider %s is invalid.", name);
return;
}
if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
gctl_error(req, "Provider %s not suspended.", name);
return;
}
cp = LIST_FIRST(&sc->sc_geom->consumer);
pp = cp->provider;
error = g_eli_read_metadata(mp, pp, &md);
if (error != 0) {
gctl_error(req, "Cannot read metadata from %s (error=%d).",
name, error);
return;
}
if (md.md_keys == 0x00) {
bzero(&md, sizeof(md));
gctl_error(req, "No valid keys on %s.", pp->name);
return;
}
key = gctl_get_param(req, "key", &keysize);
if (key == NULL || keysize != G_ELI_USERKEYLEN) {
bzero(&md, sizeof(md));
gctl_error(req, "No '%s' argument.", "key");
return;
}
error = g_eli_mkey_decrypt(&md, key, mkey, &nkey);
bzero(key, keysize);
if (error == -1) {
bzero(&md, sizeof(md));
gctl_error(req, "Wrong key for %s.", pp->name);
return;
} else if (error > 0) {
bzero(&md, sizeof(md));
gctl_error(req, "Cannot decrypt Master Key for %s (error=%d).",
pp->name, error);
return;
}
G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
mtx_lock(&sc->sc_queue_mtx);
/* Restore sc_mkey, sc_ekeys, sc_akey and sc_ivkey. */
g_eli_mkey_propagate(sc, mkey);
bzero(mkey, sizeof(mkey));
bzero(&md, sizeof(md));
/* Restore sc_akeyctx. */
if (sc->sc_flags & G_ELI_FLAG_AUTH) {
SHA256_Init(&sc->sc_akeyctx);
SHA256_Update(&sc->sc_akeyctx, sc->sc_akey,
sizeof(sc->sc_akey));
}
/* Restore sc_ivctx. */
switch (sc->sc_ealgo) {
case CRYPTO_AES_XTS:
break;
default:
SHA256_Init(&sc->sc_ivctx);
SHA256_Update(&sc->sc_ivctx, sc->sc_ivkey,
sizeof(sc->sc_ivkey));
break;
}
sc->sc_flags &= ~G_ELI_FLAG_SUSPEND;
mtx_unlock(&sc->sc_queue_mtx);
G_ELI_DEBUG(1, "Resumed %s.", pp->name);
wakeup(sc);
}
static int
g_eli_kill_one(struct g_eli_softc *sc)
{
@ -749,7 +956,7 @@ g_eli_kill_one(struct g_eli_softc *sc)
}
if (error == 0)
G_ELI_DEBUG(0, "%s has been killed.", pp->name);
g_eli_destroy(sc, 1);
g_eli_destroy(sc, TRUE);
return (error);
}
@ -839,6 +1046,10 @@ g_eli_config(struct gctl_req *req, struct g_class *mp, const char *verb)
g_eli_ctl_setkey(req, mp);
else if (strcmp(verb, "delkey") == 0)
g_eli_ctl_delkey(req, mp);
else if (strcmp(verb, "suspend") == 0)
g_eli_ctl_suspend(req, mp);
else if (strcmp(verb, "resume") == 0)
g_eli_ctl_resume(req, mp);
else if (strcmp(verb, "kill") == 0)
g_eli_ctl_kill(req, mp);
else

View File

@ -129,6 +129,7 @@ g_eli_auth_keygen(struct g_eli_softc *sc, off_t offset, u_char *key)
static int
g_eli_auth_read_done(struct cryptop *crp)
{
struct g_eli_softc *sc;
struct bio *bp;
if (crp->crp_etype == EAGAIN) {
@ -152,8 +153,8 @@ g_eli_auth_read_done(struct cryptop *crp)
*/
if (bp->bio_inbed < bp->bio_children)
return (0);
sc = bp->bio_to->geom->softc;
if (bp->bio_error == 0) {
struct g_eli_softc *sc;
u_int i, lsec, nsec, data_secsize, decr_secsize, encr_secsize;
u_char *srcdata, *dstdata, *auth;
off_t coroff, corsize;
@ -161,7 +162,6 @@ g_eli_auth_read_done(struct cryptop *crp)
/*
* Verify data integrity based on calculated and read HMACs.
*/
sc = bp->bio_to->geom->softc;
/* Sectorsize of decrypted provider eg. 4096. */
decr_secsize = bp->bio_to->sectorsize;
/* The real sectorsize of encrypted provider, eg. 512. */
@ -240,6 +240,7 @@ g_eli_auth_read_done(struct cryptop *crp)
* Read is finished, send it up.
*/
g_io_deliver(bp, bp->bio_error);
atomic_subtract_int(&sc->sc_inflight, 1);
return (0);
}
@ -276,6 +277,7 @@ g_eli_auth_write_done(struct cryptop *crp)
*/
if (bp->bio_inbed < bp->bio_children)
return (0);
sc = bp->bio_to->geom->softc;
if (bp->bio_error != 0) {
G_ELI_LOGREQ(0, bp, "Crypto WRITE request failed (error=%d).",
bp->bio_error);
@ -285,9 +287,9 @@ g_eli_auth_write_done(struct cryptop *crp)
bp->bio_driver1 = NULL;
g_destroy_bio(cbp);
g_io_deliver(bp, bp->bio_error);
atomic_subtract_int(&sc->sc_inflight, 1);
return (0);
}
sc = bp->bio_to->geom->softc;
cp = LIST_FIRST(&sc->sc_geom->consumer);
cbp = bp->bio_driver1;
bp->bio_driver1 = NULL;
@ -392,6 +394,11 @@ g_eli_auth_read(struct g_eli_softc *sc, struct bio *bp)
/*
* This is the main function responsible for cryptography (ie. communication
* with crypto(9) subsystem).
*
* BIO_READ:
* g_eli_start -> g_eli_auth_read -> g_io_request -> g_eli_read_done -> G_ELI_AUTH_RUN -> g_eli_auth_read_done -> g_io_deliver
* BIO_WRITE:
* g_eli_start -> G_ELI_AUTH_RUN -> g_eli_auth_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
*/
void
g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp)

View File

@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$");
/*
* Code paths:
* BIO_READ:
* g_eli_start -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
* g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
* BIO_WRITE:
* g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
*/
@ -63,11 +63,12 @@ MALLOC_DECLARE(M_ELI);
/*
* The function is called after we read and decrypt data.
*
* g_eli_start -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> G_ELI_CRYPTO_READ_DONE -> g_io_deliver
* g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> G_ELI_CRYPTO_READ_DONE -> g_io_deliver
*/
static int
g_eli_crypto_read_done(struct cryptop *crp)
{
struct g_eli_softc *sc;
struct bio *bp;
if (crp->crp_etype == EAGAIN) {
@ -101,7 +102,9 @@ g_eli_crypto_read_done(struct cryptop *crp)
/*
* Read is finished, send it up.
*/
sc = bp->bio_to->geom->softc;
g_io_deliver(bp, bp->bio_error);
atomic_subtract_int(&sc->sc_inflight, 1);
return (0);
}
@ -113,6 +116,7 @@ g_eli_crypto_read_done(struct cryptop *crp)
static int
g_eli_crypto_write_done(struct cryptop *crp)
{
struct g_eli_softc *sc;
struct g_geom *gp;
struct g_consumer *cp;
struct bio *bp, *cbp;
@ -141,18 +145,20 @@ g_eli_crypto_write_done(struct cryptop *crp)
bp->bio_children = 1;
cbp = bp->bio_driver1;
bp->bio_driver1 = NULL;
gp = bp->bio_to->geom;
if (bp->bio_error != 0) {
G_ELI_LOGREQ(0, bp, "Crypto WRITE request failed (error=%d).",
bp->bio_error);
free(bp->bio_driver2, M_ELI);
bp->bio_driver2 = NULL;
g_destroy_bio(cbp);
sc = gp->softc;
g_io_deliver(bp, bp->bio_error);
atomic_subtract_int(&sc->sc_inflight, 1);
return (0);
}
cbp->bio_data = bp->bio_driver2;
cbp->bio_done = g_eli_write_done;
gp = bp->bio_to->geom;
cp = LIST_FIRST(&gp->consumer);
cbp->bio_to = cp->provider;
G_ELI_LOGREQ(2, cbp, "Sending request.");
@ -163,9 +169,58 @@ g_eli_crypto_write_done(struct cryptop *crp)
return (0);
}
/*
* The function is called to read encrypted data.
*
* g_eli_start -> G_ELI_CRYPTO_READ -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
*/
void
g_eli_crypto_read(struct g_eli_softc *sc, struct bio *bp, boolean_t fromworker)
{
struct g_consumer *cp;
struct bio *cbp;
if (!fromworker) {
/*
* We are not called from the worker thread, so check if
* device is suspended.
*/
mtx_lock(&sc->sc_queue_mtx);
if (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
/*
* If device is suspended, we place the request onto
* the queue, so it can be handled after resume.
*/
G_ELI_DEBUG(0, "device suspended, move onto queue");
bioq_insert_tail(&sc->sc_queue, bp);
mtx_unlock(&sc->sc_queue_mtx);
wakeup(sc);
return;
}
atomic_add_int(&sc->sc_inflight, 1);
mtx_unlock(&sc->sc_queue_mtx);
}
bp->bio_pflags = 0;
bp->bio_driver2 = NULL;
cbp = bp->bio_driver1;
cbp->bio_done = g_eli_read_done;
cp = LIST_FIRST(&sc->sc_geom->consumer);
cbp->bio_to = cp->provider;
G_ELI_LOGREQ(2, cbp, "Sending request.");
/*
* Read encrypted data from provider.
*/
g_io_request(cbp, cp);
}
/*
* This is the main function responsible for cryptography (ie. communication
* with crypto(9) subsystem).
*
* BIO_READ:
* g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> G_ELI_CRYPTO_RUN -> g_eli_crypto_read_done -> g_io_deliver
* BIO_WRITE:
* g_eli_start -> G_ELI_CRYPTO_RUN -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
*/
void
g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp)

View File

@ -722,7 +722,9 @@ char *bootmem_start, *bootmem_current, *bootmem_end;
pteinfo_t *pteinfo_list;
void initvalues(start_info_t *startinfo);
struct ringbuf_head *xen_store; /* XXX move me */
struct xenstore_domain_interface;
extern struct xenstore_domain_interface *xen_store;
char *console_page;
void *
@ -1082,7 +1084,7 @@ initvalues(start_info_t *startinfo)
HYPERVISOR_shared_info = (shared_info_t *)cur_space;
cur_space += PAGE_SIZE;
xen_store = (struct ringbuf_head *)cur_space;
xen_store = (struct xenstore_domain_interface *)cur_space;
cur_space += PAGE_SIZE;
console_page = (char *)cur_space;

View File

@ -655,16 +655,8 @@ interpret:
setsugid(p);
#ifdef KTRACE
if (p->p_tracevp != NULL &&
priv_check_cred(oldcred, PRIV_DEBUG_DIFFCRED, 0)) {
mtx_lock(&ktrace_mtx);
p->p_traceflag = 0;
tracevp = p->p_tracevp;
p->p_tracevp = NULL;
tracecred = p->p_tracecred;
p->p_tracecred = NULL;
mtx_unlock(&ktrace_mtx);
}
if (priv_check_cred(oldcred, PRIV_DEBUG_DIFFCRED, 0))
ktrprocexec(p, &tracecred, &tracevp);
#endif
/*
* Close any file descriptors 0..2 that reference procfs,

View File

@ -121,10 +121,6 @@ exit1(struct thread *td, int rv)
struct proc *p, *nq, *q;
struct vnode *vtmp;
struct vnode *ttyvp = NULL;
#ifdef KTRACE
struct vnode *tracevp;
struct ucred *tracecred;
#endif
struct plimit *plim;
int locked;
@ -356,33 +352,7 @@ exit1(struct thread *td, int rv)
if (ttyvp != NULL)
vrele(ttyvp);
#ifdef KTRACE
/*
* Disable tracing, then drain any pending records and release
* the trace file.
*/
if (p->p_traceflag != 0) {
PROC_LOCK(p);
mtx_lock(&ktrace_mtx);
p->p_traceflag = 0;
mtx_unlock(&ktrace_mtx);
PROC_UNLOCK(p);
ktrprocexit(td);
PROC_LOCK(p);
mtx_lock(&ktrace_mtx);
tracevp = p->p_tracevp;
p->p_tracevp = NULL;
tracecred = p->p_tracecred;
p->p_tracecred = NULL;
mtx_unlock(&ktrace_mtx);
PROC_UNLOCK(p);
if (tracevp != NULL) {
locked = VFS_LOCK_GIANT(tracevp->v_mount);
vrele(tracevp);
VFS_UNLOCK_GIANT(locked);
}
if (tracecred != NULL)
crfree(tracecred);
}
ktrprocexit(td);
#endif
/*
* Release reference to text vnode

View File

@ -645,21 +645,7 @@ again:
callout_init(&p2->p_itcallout, CALLOUT_MPSAFE);
#ifdef KTRACE
/*
* Copy traceflag and tracefile if enabled.
*/
mtx_lock(&ktrace_mtx);
KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
if (p1->p_traceflag & KTRFAC_INHERIT) {
p2->p_traceflag = p1->p_traceflag;
if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
VREF(p2->p_tracevp);
KASSERT(p1->p_tracecred != NULL,
("ktrace vnode with no cred"));
p2->p_tracecred = crhold(p1->p_tracecred);
}
}
mtx_unlock(&ktrace_mtx);
ktrprocfork(p1, p2);
#endif
/*

View File

@ -126,7 +126,7 @@ SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
0, "Maximum size of genio event payload");
static int print_message = 1;
struct mtx ktrace_mtx;
static struct mtx ktrace_mtx;
static struct sx ktrace_sx;
static void ktrace_init(void *dummy);
@ -134,7 +134,10 @@ static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
static u_int ktrace_resize_pool(u_int newsize);
static struct ktr_request *ktr_getrequest(int type);
static void ktr_submitrequest(struct thread *td, struct ktr_request *req);
static void ktr_freeproc(struct proc *p, struct ucred **uc,
struct vnode **vp);
static void ktr_freerequest(struct ktr_request *req);
static void ktr_freerequest_locked(struct ktr_request *req);
static void ktr_writerequest(struct thread *td, struct ktr_request *req);
static int ktrcanset(struct thread *,struct proc *);
static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
@ -375,11 +378,43 @@ static void
ktr_freerequest(struct ktr_request *req)
{
mtx_lock(&ktrace_mtx);
ktr_freerequest_locked(req);
mtx_unlock(&ktrace_mtx);
}
static void
ktr_freerequest_locked(struct ktr_request *req)
{
mtx_assert(&ktrace_mtx, MA_OWNED);
if (req->ktr_buffer != NULL)
free(req->ktr_buffer, M_KTRACE);
mtx_lock(&ktrace_mtx);
STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
mtx_unlock(&ktrace_mtx);
}
/*
* Disable tracing for a process and release all associated resources.
* The caller is responsible for releasing a reference on the returned
* vnode and credentials.
*/
static void
ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp)
{
struct ktr_request *req;
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_assert(&ktrace_mtx, MA_OWNED);
*uc = p->p_tracecred;
p->p_tracecred = NULL;
if (vp != NULL)
*vp = p->p_tracevp;
p->p_tracevp = NULL;
p->p_traceflag = 0;
while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) {
STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list);
ktr_freerequest_locked(req);
}
}
void
@ -432,19 +467,78 @@ ktrsysret(code, error, retval)
}
/*
* When a process exits, drain per-process asynchronous trace records.
* When a setuid process execs, disable tracing.
*
* XXX: We toss any pending asynchronous records.
*/
void
ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp)
{
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_lock(&ktrace_mtx);
ktr_freeproc(p, uc, vp);
mtx_unlock(&ktrace_mtx);
}
/*
* When a process exits, drain per-process asynchronous trace records
* and disable tracing.
*/
void
ktrprocexit(struct thread *td)
{
struct proc *p;
struct ucred *cred;
struct vnode *vp;
int vfslocked;
p = td->td_proc;
if (p->p_traceflag == 0)
return;
ktrace_enter(td);
sx_xlock(&ktrace_sx);
ktr_drain(td);
sx_xunlock(&ktrace_sx);
PROC_LOCK(p);
mtx_lock(&ktrace_mtx);
ktr_freeproc(p, &cred, &vp);
mtx_unlock(&ktrace_mtx);
PROC_UNLOCK(p);
if (vp != NULL) {
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
vrele(vp);
VFS_UNLOCK_GIANT(vfslocked);
}
if (cred != NULL)
crfree(cred);
ktrace_exit(td);
}
/*
* When a process forks, enable tracing in the new process if needed.
*/
void
ktrprocfork(struct proc *p1, struct proc *p2)
{
PROC_LOCK_ASSERT(p1, MA_OWNED);
PROC_LOCK_ASSERT(p2, MA_OWNED);
mtx_lock(&ktrace_mtx);
KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
if (p1->p_traceflag & KTRFAC_INHERIT) {
p2->p_traceflag = p1->p_traceflag;
if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
VREF(p2->p_tracevp);
KASSERT(p1->p_tracecred != NULL,
("ktrace vnode with no cred"));
p2->p_tracecred = crhold(p1->p_tracecred);
}
}
mtx_unlock(&ktrace_mtx);
}
/*
* When a thread returns, drain any asynchronous records generated by the
* system call.
@ -694,10 +788,7 @@ ktrace(td, uap)
if (p->p_tracevp == vp) {
if (ktrcanset(td, p)) {
mtx_lock(&ktrace_mtx);
cred = p->p_tracecred;
p->p_tracecred = NULL;
p->p_tracevp = NULL;
p->p_traceflag = 0;
ktr_freeproc(p, &cred, NULL);
mtx_unlock(&ktrace_mtx);
vrele_count++;
crfree(cred);
@ -864,14 +955,9 @@ ktrops(td, p, ops, facs, vp)
p->p_traceflag |= KTRFAC_ROOT;
} else {
/* KTROP_CLEAR */
if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0)
/* no more tracing */
p->p_traceflag = 0;
tracevp = p->p_tracevp;
p->p_tracevp = NULL;
tracecred = p->p_tracecred;
p->p_tracecred = NULL;
}
ktr_freeproc(p, &tracecred, &tracevp);
}
mtx_unlock(&ktrace_mtx);
PROC_UNLOCK(p);
@ -1036,10 +1122,7 @@ ktr_writerequest(struct thread *td, struct ktr_request *req)
PROC_LOCK(p);
if (p->p_tracevp == vp) {
mtx_lock(&ktrace_mtx);
p->p_tracevp = NULL;
p->p_traceflag = 0;
cred = p->p_tracecred;
p->p_tracecred = NULL;
ktr_freeproc(p, &cred, NULL);
mtx_unlock(&ktrace_mtx);
vrele_count++;
}
@ -1051,11 +1134,6 @@ ktr_writerequest(struct thread *td, struct ktr_request *req)
}
sx_sunlock(&allproc_lock);
/*
* We can't clear any pending requests in threads that have cached
* them but not yet committed them, as those are per-thread. The
* thread will have to clear it itself on system call return.
*/
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
while (vrele_count-- > 0)
vrele(vp);

View File

@ -181,13 +181,12 @@ syscall_module_handler(struct module *mod, int what, void *arg)
error = syscall_deregister(data->offset, &data->old_sysent);
return (error);
default:
return EOPNOTSUPP;
if (data->chainevh)
return (data->chainevh(mod, what, data->chainarg));
return (EOPNOTSUPP);
}
if (data->chainevh)
return (data->chainevh(mod, what, data->chainarg));
else
return (0);
/* NOTREACHED */
}
int

View File

@ -45,7 +45,6 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/signalvar.h>
#include <sys/sx.h>
#include <sys/ucontext.h>
#include <sys/thr.h>
#include <sys/rtprio.h>
@ -431,40 +430,40 @@ thr_suspend(struct thread *td, struct thr_suspend_args *uap)
int
kern_thr_suspend(struct thread *td, struct timespec *tsp)
{
struct proc *p = td->td_proc;
struct timeval tv;
int error = 0;
int timo = 0;
if (tsp != NULL) {
if (tsp->tv_nsec < 0 || tsp->tv_nsec > 1000000000)
return (EINVAL);
}
if (td->td_pflags & TDP_WAKEUP) {
td->td_pflags &= ~TDP_WAKEUP;
return (0);
}
PROC_LOCK(td->td_proc);
if ((td->td_flags & TDF_THRWAKEUP) == 0) {
if (tsp != NULL) {
if (tsp->tv_nsec < 0 || tsp->tv_nsec > 1000000000)
return (EINVAL);
if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
error = EWOULDBLOCK;
else {
TIMESPEC_TO_TIMEVAL(&tv, tsp);
timo = tvtohz(&tv);
error = msleep((void *)td, &td->td_proc->p_mtx,
PCATCH, "lthr", timo);
}
}
PROC_LOCK(p);
if (error == 0 && (td->td_flags & TDF_THRWAKEUP) == 0)
error = msleep((void *)td, &p->p_mtx,
PCATCH, "lthr", timo);
if (td->td_flags & TDF_THRWAKEUP) {
thread_lock(td);
td->td_flags &= ~TDF_THRWAKEUP;
thread_unlock(td);
PROC_UNLOCK(td->td_proc);
PROC_UNLOCK(p);
return (0);
}
PROC_UNLOCK(td->td_proc);
PROC_UNLOCK(p);
if (error == EWOULDBLOCK)
error = ETIMEDOUT;
else if (error == ERESTART) {

View File

@ -283,14 +283,14 @@ xls_board_specific_overrides(struct xlr_board_info* board)
break;
case RMI_XLR_BOARD_ARIZONA_VIII:
if (blk1->enabled) {
if (blk1->enabled) {
/* There is just one Octal PHY on the board and it is
* connected to the MII interface for NA Quad 0. */
blk1->gmac_port[0].mii_addr = XLR_IO_GMAC_0_OFFSET;
blk1->gmac_port[1].mii_addr = XLR_IO_GMAC_0_OFFSET;
blk1->gmac_port[2].mii_addr = XLR_IO_GMAC_0_OFFSET;
blk1->gmac_port[3].mii_addr = XLR_IO_GMAC_0_OFFSET;
for (i = 0; i < 4; i++) {
blk1->gmac_port[i].mii_addr =
XLR_IO_GMAC_0_OFFSET;
blk1->gmac_port[i].mdint_id = 0;
}
}
break;

View File

@ -861,7 +861,7 @@ nlge_mii_read(struct device *dev, int phyaddr, int regidx)
int val;
sc = device_get_softc(dev);
val = (sc->port_type != XLR_XGMII) ? (0xffff) :
val = (sc->port_type == XLR_XGMII) ? (0xffff) :
nlge_mii_read_internal(sc->mii_base, phyaddr, regidx);
return (val);

View File

@ -167,6 +167,14 @@ xlr_parse_mmu_options(void)
*/
xlr_ncores = 1;
cpu_map = xlr_boot1_info.cpu_online_map;
#ifndef SMP /* Uniprocessor! */
if (cpu_map != 0x1) {
printf("WARNING: Starting uniprocessor kernel on cpumask [0x%lx]!\n"
"WARNING: Other CPUs will be unused.\n", (u_long)cpu_map);
cpu_map = 0x1;
}
#endif
core0_thr_mask = cpu_map & 0xf;
switch (core0_thr_mask) {
case 1:
@ -188,9 +196,9 @@ xlr_parse_mmu_options(void)
xlr_ncores++;
}
}
xlr_hw_thread_mask = cpu_map;
/* setup hardware processor id to cpu id mapping */
xlr_hw_thread_mask = xlr_boot1_info.cpu_online_map;
for (i = 0; i< MAXCPU; i++)
xlr_cpuid_to_hwtid[i] =
xlr_hwtid_to_cpuid [i] = -1;

View File

@ -92,6 +92,11 @@
#include <security/mac/mac_framework.h>
#ifdef COMPAT_FREEBSD32
#include <sys/mount.h>
#include <compat/freebsd32/freebsd32.h>
#endif
struct ifindex_entry {
struct ifnet *ife_ifnet;
};
@ -2402,6 +2407,17 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (error);
}
#ifdef COMPAT_FREEBSD32
struct ifconf32 {
int32_t ifc_len;
union {
uint32_t ifcu_buf;
uint32_t ifcu_req;
} ifc_ifcu;
};
#define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32)
#endif
/*
* Interface ioctls.
*/
@ -2416,10 +2432,21 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
switch (cmd) {
case SIOCGIFCONF:
case OSIOCGIFCONF:
#ifdef __amd64__
case SIOCGIFCONF32:
#endif
return (ifconf(cmd, data));
#ifdef COMPAT_FREEBSD32
case SIOCGIFCONF32:
{
struct ifconf32 *ifc32;
struct ifconf ifc;
ifc32 = (struct ifconf32 *)data;
ifc.ifc_len = ifc32->ifc_len;
ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
return (ifconf(SIOCGIFCONF, (void *)&ifc));
}
#endif
}
ifr = (struct ifreq *)data;
@ -2646,23 +2673,12 @@ static int
ifconf(u_long cmd, caddr_t data)
{
struct ifconf *ifc = (struct ifconf *)data;
#ifdef __amd64__
struct ifconf32 *ifc32 = (struct ifconf32 *)data;
struct ifconf ifc_swab;
#endif
struct ifnet *ifp;
struct ifaddr *ifa;
struct ifreq ifr;
struct sbuf *sb;
int error, full = 0, valid_len, max_len;
#ifdef __amd64__
if (cmd == SIOCGIFCONF32) {
ifc_swab.ifc_len = ifc32->ifc_len;
ifc_swab.ifc_buf = (caddr_t)(uintptr_t)ifc32->ifc_buf;
ifc = &ifc_swab;
}
#endif
/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
max_len = MAXPHYS - 1;
@ -2752,10 +2768,6 @@ again:
}
ifc->ifc_len = valid_len;
#ifdef __amd64__
if (cmd == SIOCGIFCONF32)
ifc32->ifc_len = valid_len;
#endif
sbuf_finish(sb);
error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
sbuf_delete(sb);

View File

@ -391,16 +391,6 @@ struct ifconf {
#define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */
};
#if defined (__amd64__)
struct ifconf32 {
int ifc_len; /* size of associated buffer */
union {
u_int ifcu_buf;
u_int ifcu_req;
} ifc_ifcu;
};
#endif
/*
* interface groups
*/

View File

@ -191,8 +191,6 @@ struct stat;
#define KTRFAC_DROP 0x20000000 /* last event was dropped */
#ifdef _KERNEL
extern struct mtx ktrace_mtx;
void ktrnamei(char *);
void ktrcsw(int, int);
void ktrpsig(int, sig_t, sigset_t *, int);
@ -200,7 +198,9 @@ void ktrgenio(int, enum uio_rw, struct uio *, int);
void ktrsyscall(int, int narg, register_t args[]);
void ktrsysctl(int *name, u_int namelen);
void ktrsysret(int, int, register_t);
void ktrprocexec(struct proc *, struct ucred **, struct vnode **);
void ktrprocexit(struct thread *);
void ktrprocfork(struct proc *, struct proc *);
void ktruserret(struct thread *);
void ktrstruct(const char *, void *, size_t);
#define ktrsockaddr(s) \

View File

@ -62,9 +62,6 @@
#define SIOCSIFBRDADDR _IOW('i', 19, struct ifreq) /* set broadcast addr */
#define OSIOCGIFCONF _IOWR('i', 20, struct ifconf) /* get ifnet list */
#define SIOCGIFCONF _IOWR('i', 36, struct ifconf) /* get ifnet list */
#if defined (__amd64__)
#define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) /* get ifnet list */
#endif
#define OSIOCGIFNETMASK _IOWR('i', 21, struct ifreq) /* get net addr mask */
#define SIOCGIFNETMASK _IOWR('i', 37, struct ifreq) /* get net addr mask */
#define SIOCSIFNETMASK _IOW('i', 22, struct ifreq) /* set net addr mask */

View File

@ -1460,8 +1460,8 @@ swap_pager_putpages(vm_object_t object, vm_page_t *m, int count,
* Completion routine for asynchronous reads and writes from/to swap.
* Also called manually by synchronous code to finish up a bp.
*
* For READ operations, the pages are PG_BUSY'd. For WRITE operations,
* the pages are vm_page_t->busy'd. For READ operations, we PG_BUSY
* For READ operations, the pages are VPO_BUSY'd. For WRITE operations,
* the pages are vm_page_t->busy'd. For READ operations, we VPO_BUSY
* unbusy all pages except the 'main' request page. For WRITE
* operations, we vm_page_t->busy'd unbusy all pages ( we can do this
* because we marked them all VM_PAGER_PEND on return from putpages ).

View File

@ -339,15 +339,11 @@ vmspace_dofree(struct vmspace *vm)
void
vmspace_free(struct vmspace *vm)
{
int refcnt;
if (vm->vm_refcnt == 0)
panic("vmspace_free: attempt to free already freed vmspace");
do
refcnt = vm->vm_refcnt;
while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
if (refcnt == 1)
if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1)
vmspace_dofree(vm);
}

View File

@ -236,7 +236,7 @@ struct vmspace {
caddr_t vm_taddr; /* (c) user virtual address of text */
caddr_t vm_daddr; /* (c) user virtual address of data */
caddr_t vm_maxsaddr; /* user VA at max stack growth */
int vm_refcnt; /* number of references */
volatile int vm_refcnt; /* number of references */
/*
* Keep the PMAP last, so that CPU-specific variations of that
* structure on a single architecture don't result in offset

145
sys/xen/blkif.h Normal file
View File

@ -0,0 +1,145 @@
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* $FreeBSD$
*/
#ifndef __XEN_BLKIF_H__
#define __XEN_BLKIF_H__
#include <xen/interface/io/ring.h>
#include <xen/interface/io/blkif.h>
#include <xen/interface/io/protocols.h>
/* Not a real protocol. Used to generate ring structs which contain
* the elements common to all protocols only. This way we get a
* compiler-checkable way to use common struct elements, so we can
* avoid using switch(protocol) in a number of places. */
struct blkif_common_request {
char dummy;
};
struct blkif_common_response {
char dummy;
};
/* i386 protocol version */
#pragma pack(push, 4)
struct blkif_x86_32_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK];
};
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
typedef struct blkif_x86_32_request blkif_x86_32_request_t;
typedef struct blkif_x86_32_response blkif_x86_32_response_t;
#pragma pack(pop)
/* x86_64 protocol version */
struct blkif_x86_64_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t __attribute__((__aligned__(8))) id;
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK];
};
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
typedef struct blkif_x86_64_request blkif_x86_64_request_t;
typedef struct blkif_x86_64_response blkif_x86_64_response_t;
DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response);
DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response);
DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response);
/*
* Maximum number of requests that can be active for a given instance
* regardless of the protocol in use, based on the ring size. This constant
* facilitates resource pre-allocation in backend drivers since the size is
* known well in advance of attaching to a front end.
*/
#define BLKIF_MAX_RING_REQUESTS(_sz) \
MAX(__RING_SIZE((blkif_x86_64_sring_t *)NULL, _sz), \
MAX(__RING_SIZE((blkif_x86_32_sring_t *)NULL, _sz), \
__RING_SIZE((blkif_sring_t *)NULL, _sz)))
/*
* The number of ring pages required to support a given number of requests
* for a given instance regardless of the protocol in use.
*/
#define BLKIF_RING_PAGES(_entries) \
MAX(__RING_PAGES((blkif_x86_64_sring_t *)NULL, _entries), \
MAX(__RING_PAGES((blkif_x86_32_sring_t *)NULL, _entries), \
__RING_PAGES((blkif_sring_t *)NULL, _entries)))
union blkif_back_rings {
blkif_back_ring_t native;
blkif_common_back_ring_t common;
blkif_x86_32_back_ring_t x86_32;
blkif_x86_64_back_ring_t x86_64;
};
typedef union blkif_back_rings blkif_back_rings_t;
enum blkif_protocol {
BLKIF_PROTOCOL_NATIVE = 1,
BLKIF_PROTOCOL_X86_32 = 2,
BLKIF_PROTOCOL_X86_64 = 3,
};
static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src)
{
int i, n = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
dst->operation = src->operation;
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
dst->sector_number = src->sector_number;
barrier();
if (n > dst->nr_segments)
n = dst->nr_segments;
for (i = 0; i < n; i++)
dst->seg[i] = src->seg[i];
}
static void inline blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src)
{
int i, n = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
dst->operation = src->operation;
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
dst->sector_number = src->sector_number;
barrier();
if (n > dst->nr_segments)
n = dst->nr_segments;
for (i = 0; i < n; i++)
dst->seg[i] = src->seg[i];
}
#endif /* __XEN_BLKIF_H__ */

View File

@ -492,15 +492,15 @@ bind_listening_port_to_irqhandler(unsigned int remote_domain,
int
bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
unsigned int remote_port, const char *devname,
driver_filter_t filter, driver_intr_t handler,
unsigned long irqflags, unsigned int *irqp)
driver_intr_t handler, void *arg, unsigned long irqflags,
unsigned int *irqp)
{
unsigned int irq;
int error;
irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
intr_register_source(&xp->xp_pins[irq].xp_intsrc);
error = intr_add_handler(devname, irq, filter, handler, NULL,
error = intr_add_handler(devname, irq, NULL, handler, arg,
irqflags, &xp->xp_pins[irq].xp_cookie);
if (error) {
unbind_from_irq(irq);

View File

@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
#define GNTTAB_LIST_END 0xffffffff
#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
static grant_ref_t **gnttab_list;
@ -66,7 +65,7 @@ get_free_entries(int count, int *entries)
{
int ref, error;
grant_ref_t head;
mtx_lock(&gnttab_list_lock);
if ((gnttab_free_count < count) &&
((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
@ -79,7 +78,7 @@ get_free_entries(int count, int *entries)
head = gnttab_entry(head);
gnttab_free_head = gnttab_entry(head);
gnttab_entry(head) = GNTTAB_LIST_END;
mtx_unlock(&gnttab_list_lock);
mtx_unlock(&gnttab_list_lock);
*entries = ref;
return (0);
@ -122,7 +121,7 @@ put_free_entry(grant_ref_t ref)
gnttab_free_head = ref;
gnttab_free_count++;
check_free_callbacks();
mtx_unlock(&gnttab_list_lock);
mtx_unlock(&gnttab_list_lock);
}
/*
@ -136,7 +135,7 @@ gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
int error, ref;
error = get_free_entries(1, &ref);
if (unlikely(error))
return (error);
@ -166,9 +165,9 @@ int
gnttab_query_foreign_access(grant_ref_t ref)
{
uint16_t nflags;
nflags = shared[ref].flags;
return (nflags & (GTF_reading|GTF_writing));
}
@ -180,7 +179,7 @@ gnttab_end_foreign_access_ref(grant_ref_t ref)
nflags = shared[ref].flags;
do {
if ( (flags = nflags) & (GTF_reading|GTF_writing) ) {
printf("WARNING: g.e. still in use!\n");
printf("%s: WARNING: g.e. still in use!\n", __func__);
return (0);
}
} while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) !=
@ -201,7 +200,44 @@ gnttab_end_foreign_access(grant_ref_t ref, void *page)
else {
/* XXX This needs to be fixed so that the ref and page are
placed on a list to be freed up later. */
printf("WARNING: leaking g.e. and page still in use!\n");
printf("%s: WARNING: leaking g.e. and page still in use!\n",
__func__);
}
}
void
gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
{
grant_ref_t *last_ref;
grant_ref_t head;
grant_ref_t tail;
head = GNTTAB_LIST_END;
tail = *refs;
last_ref = refs + count;
while (refs != last_ref) {
if (gnttab_end_foreign_access_ref(*refs)) {
gnttab_entry(*refs) = head;
head = *refs;
} else {
/*
* XXX This needs to be fixed so that the ref
* is placed on a list to be freed up later.
*/
printf("%s: WARNING: leaking g.e. still in use!\n",
__func__);
count--;
}
refs++;
}
if (count != 0) {
mtx_lock(&gnttab_list_lock);
gnttab_free_count += count;
gnttab_entry(tail) = gnttab_free_head;
gnttab_free_head = head;
mtx_unlock(&gnttab_list_lock);
}
}
@ -216,7 +252,7 @@ gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
return (error);
gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
*result = ref;
return (0);
}
@ -282,16 +318,16 @@ gnttab_free_grant_references(grant_ref_t head)
{
grant_ref_t ref;
int count = 1;
if (head == GNTTAB_LIST_END)
return;
mtx_lock(&gnttab_list_lock);
ref = head;
while (gnttab_entry(ref) != GNTTAB_LIST_END) {
ref = gnttab_entry(ref);
count++;
}
mtx_lock(&gnttab_list_lock);
gnttab_entry(ref) = gnttab_free_head;
gnttab_free_head = head;
gnttab_free_count += count;
@ -403,7 +439,7 @@ grow_gnttab_list(unsigned int more_frames)
check_free_callbacks();
return (0);
grow_nomem:
for ( ; i >= nr_grant_frames; i--)
free(gnttab_list[i], M_DEVBUF);
@ -490,7 +526,7 @@ gnttab_map(unsigned int start_idx, unsigned int end_idx)
if (shared == NULL) {
vm_offset_t area;
area = kmem_alloc_nofault(kernel_map,
PAGE_SIZE * max_nr_grant_frames());
KASSERT(area, ("can't allocate VM space for grant table"));
@ -502,7 +538,7 @@ gnttab_map(unsigned int start_idx, unsigned int end_idx)
((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V);
free(frames, M_DEVBUF);
return (0);
}
@ -517,7 +553,7 @@ gnttab_resume(void)
int
gnttab_suspend(void)
{
{
int i;
for (i = 0; i < nr_grant_frames; i++)
@ -532,7 +568,8 @@ gnttab_suspend(void)
static vm_paddr_t resume_frames;
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
static int
gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct xen_add_to_physmap xatp;
unsigned int i = end_idx;
@ -552,7 +589,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
if (shared == NULL) {
vm_offset_t area;
area = kmem_alloc_nofault(kernel_map,
PAGE_SIZE * max_nr_grant_frames());
KASSERT(area, ("can't allocate VM space for grant table"));
@ -643,10 +680,10 @@ gnttab_init()
if (gnttab_list[i] == NULL)
goto ini_nomem;
}
if (gnttab_resume())
return (ENODEV);
nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
@ -670,4 +707,3 @@ ini_nomem:
}
MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF);
//SYSINIT(gnttab, SI_SUB_PSEUDO, SI_ORDER_FIRST, gnttab_init, NULL);

View File

@ -43,6 +43,8 @@
#include <machine/xen/xen-os.h>
#include <xen/features.h>
#define GNTTAB_LIST_END GRANT_REF_INVALID
struct gnttab_free_callback {
struct gnttab_free_callback *next;
void (*fn)(void *);
@ -74,6 +76,13 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref);
*/
void gnttab_end_foreign_access(grant_ref_t ref, void *page);
/*
* Eventually end access through the given array of grant references.
* Access will be ended immediately iff the grant entry is not in use,
* otherwise it will happen some time later
*/
void gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs);
int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result);
unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);

View File

@ -159,6 +159,8 @@ typedef struct grant_entry grant_entry_t;
*/
typedef uint32_t grant_ref_t;
#define GRANT_REF_INVALID 0xffffffff
/*
* Handle to track a mapping created via a grant reference.
*/

View File

@ -95,4 +95,30 @@
#define HVM_NR_PARAMS 15
#ifdef XENHVM
/**
* Retrieve an HVM setting from the hypervisor.
*
* \param index The index of the HVM parameter to retrieve.
*
* \return On error, 0. Otherwise the value of the requested parameter.
*/
static inline unsigned long
hvm_get_parameter(int index)
{
struct xen_hvm_param xhv;
int error;
xhv.domid = DOMID_SELF;
xhv.index = index;
error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
if (error) {
printf("hvm_get_parameter: failed to get %d, error %d\n",
index, error);
return (0);
}
return (xhv.value);
}
#endif
#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */

View File

@ -78,11 +78,19 @@
#define BLKIF_OP_FLUSH_DISKCACHE 3
/*
* Maximum scatter/gather segments per request.
* This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
* NB. This could be 12 if the ring indexes weren't stored in the same page.
* Maximum scatter/gather segments associated with a request header block.
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
#define BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK 11
/*
* Maximum scatter/gather segments associated with a segment block.
*/
#define BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK 14
/*
* Maximum scatter/gather segments per request (header + segment blocks).
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 255
struct blkif_request_segment {
grant_ref_t gref; /* reference to I/O buffer frame */
@ -90,6 +98,7 @@ struct blkif_request_segment {
/* @last_sect: last sector in frame to transfer (inclusive). */
uint8_t first_sect, last_sect;
};
typedef struct blkif_request_segment blkif_request_segment_t;
struct blkif_request {
uint8_t operation; /* BLKIF_OP_??? */
@ -97,7 +106,7 @@ struct blkif_request {
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK];
};
typedef struct blkif_request blkif_request_t;
@ -124,10 +133,22 @@ typedef struct blkif_response blkif_response_t;
DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
#define BLKRING_GET_SG_REQUEST(_r, _idx) \
((struct blkif_request_segment *)RING_GET_REQUEST(_r, _idx))
#define VDISK_CDROM 0x1
#define VDISK_REMOVABLE 0x2
#define VDISK_READONLY 0x4
/*
* The number of ring request blocks required to handle an I/O
* request containing _segs segments.
*/
#define BLKIF_SEGS_TO_BLOCKS(_segs) \
((((_segs - BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK) \
+ (BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK - 1)) \
/ BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK) + /*header_block*/1)
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
/*

View File

@ -26,6 +26,7 @@
#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi"
#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi"
#define XEN_IO_PROTO_ABI_IA64 "ia64-abi"
#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi"
#if defined(__i386__)
# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
@ -33,6 +34,8 @@
# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
#elif defined(__ia64__)
# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64
#elif defined(__powerpc64__)
# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64
#else
# error arch fixup needed here
#endif

View File

@ -44,6 +44,12 @@ typedef unsigned int RING_IDX;
#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x))
#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
/*
* The amount of space reserved in the shared ring for accounting information.
*/
#define __RING_HEADER_SIZE(_s) \
((intptr_t)(_s)->ring - (intptr_t)(_s))
/*
* Calculate size of a shared ring, given the total available space for the
* ring and indexes (_sz), and the name tag of the request/response structure.
@ -51,7 +57,17 @@ typedef unsigned int RING_IDX;
* power of two (so we can mask with (size-1) to loop around).
*/
#define __RING_SIZE(_s, _sz) \
(__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
(__RD32(((_sz) - __RING_HEADER_SIZE(_s)) / sizeof((_s)->ring[0])))
/*
* The number of pages needed to support a given number of request/reponse
* entries. The entry count is rounded down to the nearest power of two
* as required by the ring macros.
*/
#define __RING_PAGES(_s, _entries) \
((__RING_HEADER_SIZE(_s) \
+ (__RD32(_entries) * sizeof((_s)->ring[0])) \
+ PAGE_SIZE - 1) / PAGE_SIZE)
/*
* Macros to make the correct C datatypes for a new kind of ring.

View File

@ -36,6 +36,9 @@
enum xenbus_state {
XenbusStateUnknown = 0,
/*
* Initializing: Back-end is initializing.
*/
XenbusStateInitialising = 1,
/*
@ -49,6 +52,9 @@ enum xenbus_state {
*/
XenbusStateInitialised = 3,
/*
* Connected: The normal state for a front to backend connection.
*/
XenbusStateConnected = 4,
/*
@ -56,6 +62,9 @@ enum xenbus_state {
*/
XenbusStateClosing = 5,
/*
* Closed: No connection exists between front and back end.
*/
XenbusStateClosed = 6,
/*

View File

@ -1,266 +0,0 @@
/*
*
* Copyright (c) 2004 Christian Limpach.
* Copyright (c) 2004-2006,2008 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Christian Limpach.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/reboot.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
#include <xen/xenbus/xenbusvar.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#ifdef XENHVM
#include <dev/xen/xenpci/xenpcivar.h>
#else
static void xen_suspend(void);
#endif
static void
shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
char *str;
struct xenbus_transaction xbt;
int error, howto;
howto = 0;
again:
error = xenbus_transaction_start(&xbt);
if (error)
return;
error = xenbus_read(xbt, "control", "shutdown", NULL, (void **) &str);
/* Ignore read errors and empty reads. */
if (error || strlen(str) == 0) {
xenbus_transaction_end(xbt, 1);
return;
}
xenbus_write(xbt, "control", "shutdown", "");
error = xenbus_transaction_end(xbt, 0);
if (error == EAGAIN) {
free(str, M_DEVBUF);
goto again;
}
if (strcmp(str, "reboot") == 0)
howto = 0;
else if (strcmp(str, "poweroff") == 0)
howto |= (RB_POWEROFF | RB_HALT);
else if (strcmp(str, "halt") == 0)
#ifdef XENHVM
/*
* We rely on acpi powerdown to halt the VM.
*/
howto |= (RB_POWEROFF | RB_HALT);
#else
howto |= RB_HALT;
#endif
else if (strcmp(str, "suspend") == 0)
howto = -1;
else {
printf("Ignoring shutdown request: %s\n", str);
goto done;
}
if (howto == -1) {
xen_suspend();
goto done;
}
shutdown_nice(howto);
done:
free(str, M_DEVBUF);
}
#ifndef XENHVM
/*
* In HV mode, we let acpi take care of halts and reboots.
*/
static void
xen_shutdown_final(void *arg, int howto)
{
if (howto & (RB_HALT | RB_POWEROFF))
HYPERVISOR_shutdown(SHUTDOWN_poweroff);
else
HYPERVISOR_shutdown(SHUTDOWN_reboot);
}
#endif
static struct xenbus_watch shutdown_watch = {
.node = "control/shutdown",
.callback = shutdown_handler
};
static void
setup_shutdown_watcher(void *unused)
{
if (register_xenbus_watch(&shutdown_watch))
printf("Failed to set shutdown watcher\n");
#ifndef XENHVM
EVENTHANDLER_REGISTER(shutdown_final, xen_shutdown_final, NULL,
SHUTDOWN_PRI_LAST);
#endif
}
SYSINIT(shutdown, SI_SUB_PSEUDO, SI_ORDER_ANY, setup_shutdown_watcher, NULL);
#ifndef XENHVM
extern void xencons_suspend(void);
extern void xencons_resume(void);
static void
xen_suspend()
{
int i, j, k, fpp;
unsigned long max_pfn, start_info_mfn;
#ifdef SMP
cpumask_t map;
/*
* Bind us to CPU 0 and stop any other VCPUs.
*/
thread_lock(curthread);
sched_bind(curthread, 0);
thread_unlock(curthread);
KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0"));
map = PCPU_GET(other_cpus) & ~stopped_cpus;
if (map)
stop_cpus(map);
#endif
if (DEVICE_SUSPEND(root_bus) != 0) {
printf("xen_suspend: device_suspend failed\n");
#ifdef SMP
if (map)
restart_cpus(map);
#endif
return;
}
local_irq_disable();
xencons_suspend();
gnttab_suspend();
max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
void *shared_info = HYPERVISOR_shared_info;
HYPERVISOR_shared_info = NULL;
pmap_kremove((vm_offset_t) shared_info);
PT_UPDATES_FLUSH();
xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn);
/*
* We'll stop somewhere inside this hypercall. When it returns,
* we'll start resuming after the restore.
*/
start_info_mfn = VTOMFN(xen_start_info);
pmap_suspend();
HYPERVISOR_suspend(start_info_mfn);
pmap_resume();
pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
HYPERVISOR_shared_info = shared_info;
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
VTOMFN(xen_pfn_to_mfn_frame_list_list);
fpp = PAGE_SIZE/sizeof(unsigned long);
for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
if ((j % fpp) == 0) {
k++;
xen_pfn_to_mfn_frame_list_list[k] =
VTOMFN(xen_pfn_to_mfn_frame_list[k]);
j = 0;
}
xen_pfn_to_mfn_frame_list[k][j] =
VTOMFN(&xen_phys_machine[i]);
}
HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
gnttab_resume();
irq_resume();
local_irq_enable();
xencons_resume();
#ifdef CONFIG_SMP
for_each_cpu(i)
vcpu_prepare(i);
#endif
/*
* Only resume xenbus /after/ we've prepared our VCPUs; otherwise
* the VCPU hotplug callback can race with our vcpu_prepare
*/
DEVICE_RESUME(root_bus);
#ifdef SMP
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
if (map)
restart_cpus(map);
#endif
}
#endif

View File

@ -76,7 +76,7 @@ extern int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu,
*/
extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
unsigned int remote_port, const char *devname,
driver_filter_t filter, driver_intr_t handler,
driver_intr_t handler, void *arg,
unsigned long irqflags, unsigned int *irqp);
/*

View File

@ -1,14 +0,0 @@
- frontend driver initializes static xenbus_driver with _ids, _probe, _remove,
_resume, _otherend_changed
- initialization calls xenbus_register_frontend(xenbus_driver)
- xenbus_register_frontend sets read_otherend details to read_backend_details
then calls xenbus_register_driver_common(xenbus_driver, xenbus_frontend)
- xenbus_register_driver_common sets underlying driver name to xenbus_driver name
underlying driver bus to xenbus_frontend's bus, driver's probe to xenbus_dev_probe
driver's remove to xenbus_dev_remove then calls driver_register

View File

@ -1,8 +1,4 @@
/******************************************************************************
* Client-facing interface for the Xenbus driver. In other words, the
* interface between the Xenbus and the device-specific code, be it the
* frontend or the backend of that driver.
*
* Copyright (C) 2005 XenSource Ltd
*
* This file may be distributed separately from the Linux kernel, or
@ -27,6 +23,14 @@
* IN THE SOFTWARE.
*/
/**
* \file xenbus.c
*
* \brief Client-facing interface for the Xenbus driver.
*
* In other words, the interface between the Xenbus and the device-specific
* code, be it the frontend or the backend of that driver.
*/
#if 0
#define DPRINTK(fmt, args...) \
@ -39,9 +43,12 @@
__FBSDID("$FreeBSD$");
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/libkern.h>
#include <sys/sbuf.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
@ -50,6 +57,34 @@ __FBSDID("$FreeBSD$");
#include <xen/xenbus/xenbusvar.h>
#include <machine/stdarg.h>
MALLOC_DEFINE(M_XENBUS, "xenbus", "XenBus Support");
/*------------------------- Private Functions --------------------------------*/
/**
* \brief Construct the error path corresponding to the given XenBus
* device.
*
* \param dev The XenBus device for which we are constructing an error path.
*
* \return On success, the contructed error path. Otherwise NULL.
*
* It is the caller's responsibility to free any returned error path
* node using the M_XENBUS malloc type.
*/
static char *
error_path(device_t dev)
{
char *path_buffer = malloc(strlen("error/")
+ strlen(xenbus_get_node(dev)) + 1,M_XENBUS, M_WAITOK);
strcpy(path_buffer, "error/");
strcpy(path_buffer + strlen("error/"), xenbus_get_node(dev));
return (path_buffer);
}
/*--------------------------- Public Functions -------------------------------*/
/*-------- API comments for these methods can be found in xenbusvar.h --------*/
const char *
xenbus_strstate(XenbusState state)
{
@ -67,15 +102,15 @@ xenbus_strstate(XenbusState state)
}
int
xenbus_watch_path(device_t dev, char *path, struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *, const char **, unsigned int))
xenbus_watch_path(device_t dev, char *path, struct xs_watch *watch,
xs_watch_cb_t *callback)
{
int error;
watch->node = path;
watch->callback = callback;
error = register_xenbus_watch(watch);
error = xs_register_watch(watch);
if (error) {
watch->node = NULL;
@ -88,12 +123,12 @@ xenbus_watch_path(device_t dev, char *path, struct xenbus_watch *watch,
int
xenbus_watch_path2(device_t dev, const char *path,
const char *path2, struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *, const char **, unsigned int))
const char *path2, struct xs_watch *watch,
xs_watch_cb_t *callback)
{
int error;
char *state = malloc(strlen(path) + 1 + strlen(path2) + 1,
M_DEVBUF, M_WAITOK);
M_XENBUS, M_WAITOK);
strcpy(state, path);
strcat(state, "/");
@ -101,46 +136,27 @@ xenbus_watch_path2(device_t dev, const char *path,
error = xenbus_watch_path(dev, state, watch, callback);
if (error) {
free(state, M_DEVBUF);
free(state,M_XENBUS);
}
return (error);
}
/**
* Return the path to the error node for the given device, or NULL on failure.
* If the value returned is non-NULL, then it is the caller's to kfree.
*/
static char *
error_path(device_t dev)
{
char *path_buffer = malloc(strlen("error/")
+ strlen(xenbus_get_node(dev)) + 1, M_DEVBUF, M_WAITOK);
strcpy(path_buffer, "error/");
strcpy(path_buffer + strlen("error/"), xenbus_get_node(dev));
return (path_buffer);
}
static void
_dev_error(device_t dev, int err, const char *fmt, va_list ap)
void
xenbus_dev_verror(device_t dev, int err, const char *fmt, va_list ap)
{
int ret;
unsigned int len;
char *printf_buffer = NULL, *path_buffer = NULL;
#define PRINTF_BUFFER_SIZE 4096
printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK);
printf_buffer = malloc(PRINTF_BUFFER_SIZE,M_XENBUS, M_WAITOK);
len = sprintf(printf_buffer, "%i ", err);
ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
KASSERT(len + ret <= PRINTF_BUFFER_SIZE-1, ("xenbus error message too big"));
#if 0
dev_err(&dev->dev, "%s\n", printf_buffer);
#endif
device_printf(dev, "Error %s\n", printf_buffer);
path_buffer = error_path(dev);
if (path_buffer == NULL) {
@ -149,7 +165,7 @@ _dev_error(device_t dev, int err, const char *fmt, va_list ap)
goto fail;
}
if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
if (xs_write(XST_NIL, path_buffer, "error", printf_buffer) != 0) {
printf("xenbus: failed to write error node for %s (%s)\n",
xenbus_get_node(dev), printf_buffer);
goto fail;
@ -157,9 +173,9 @@ _dev_error(device_t dev, int err, const char *fmt, va_list ap)
fail:
if (printf_buffer)
free(printf_buffer, M_DEVBUF);
free(printf_buffer,M_XENBUS);
if (path_buffer)
free(path_buffer, M_DEVBUF);
free(path_buffer,M_XENBUS);
}
void
@ -168,41 +184,45 @@ xenbus_dev_error(device_t dev, int err, const char *fmt, ...)
va_list ap;
va_start(ap, fmt);
_dev_error(dev, err, fmt, ap);
xenbus_dev_verror(dev, err, fmt, ap);
va_end(ap);
}
void
xenbus_dev_vfatal(device_t dev, int err, const char *fmt, va_list ap)
{
xenbus_dev_verror(dev, err, fmt, ap);
device_printf(dev, "Fatal error. Transitioning to Closing State\n");
xenbus_set_state(dev, XenbusStateClosing);
}
void
xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
_dev_error(dev, err, fmt, ap);
xenbus_dev_vfatal(dev, err, fmt, ap);
va_end(ap);
xenbus_set_state(dev, XenbusStateClosing);
}
int
xenbus_grant_ring(device_t dev, unsigned long ring_mfn, int *refp)
xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp)
{
int error;
grant_ref_t ref;
error = gnttab_grant_foreign_access(
xenbus_get_otherend_id(dev), ring_mfn, 0, &ref);
xenbus_get_otherend_id(dev), ring_mfn, 0, refp);
if (error) {
xenbus_dev_fatal(dev, error, "granting access to ring page");
return (error);
}
*refp = ref;
return (0);
}
int
xenbus_alloc_evtchn(device_t dev, int *port)
xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port)
{
struct evtchn_alloc_unbound alloc_unbound;
int err;
@ -222,7 +242,7 @@ xenbus_alloc_evtchn(device_t dev, int *port)
}
int
xenbus_free_evtchn(device_t dev, int port)
xenbus_free_evtchn(device_t dev, evtchn_port_t port)
{
struct evtchn_close close;
int err;
@ -240,12 +260,29 @@ xenbus_free_evtchn(device_t dev, int port)
XenbusState
xenbus_read_driver_state(const char *path)
{
XenbusState result;
int error;
XenbusState result;
int error;
error = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
if (error)
result = XenbusStateClosed;
error = xs_gather(XST_NIL, path, "state", "%d", &result, NULL);
if (error)
result = XenbusStateClosed;
return (result);
return (result);
}
int
xenbus_dev_is_online(device_t dev)
{
const char *path;
int error;
int value;
path = xenbus_get_node(dev);
error = xs_gather(XST_NIL, path, "online", "%d", &value, NULL);
if (error != 0) {
/* Default to not online. */
value = 0;
}
return (value);
}

View File

@ -1,226 +0,0 @@
/******************************************************************************
* xenbus_comms.c
*
* Low level code to talks to Xen Store: ringbuffer and event channel.
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sx.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <xen/evtchn.h>
#include <xen/interface/io/xs_wire.h>
#include <xen/xenbus/xenbus_comms.h>
static unsigned int xenstore_irq;
static inline struct xenstore_domain_interface *
xenstore_domain_interface(void)
{
return (struct xenstore_domain_interface *)xen_store;
}
static void
xb_intr(void * arg __attribute__((unused)))
{
wakeup(xen_store);
}
static int
xb_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
{
return ((prod - cons) <= XENSTORE_RING_SIZE);
}
static void *
xb_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
char *buf, uint32_t *len)
{
*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
*len = XENSTORE_RING_SIZE - (prod - cons);
return (buf + MASK_XENSTORE_IDX(prod));
}
static const void *
xb_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
const char *buf, uint32_t *len)
{
*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
if ((prod - cons) < *len)
*len = prod - cons;
return (buf + MASK_XENSTORE_IDX(cons));
}
int
xb_write(const void *tdata, unsigned len, struct lock_object *lock)
{
struct xenstore_domain_interface *intf = xenstore_domain_interface();
XENSTORE_RING_IDX cons, prod;
const char *data = (const char *)tdata;
int error;
while (len != 0) {
void *dst;
unsigned int avail;
while ((intf->req_prod - intf->req_cons)
== XENSTORE_RING_SIZE) {
error = _sleep(intf,
lock,
PCATCH, "xbwrite", hz/10);
if (error && error != EWOULDBLOCK)
return (error);
}
/* Read indexes, then verify. */
cons = intf->req_cons;
prod = intf->req_prod;
mb();
if (!xb_check_indexes(cons, prod)) {
intf->req_cons = intf->req_prod = 0;
return (EIO);
}
dst = xb_get_output_chunk(cons, prod, intf->req, &avail);
if (avail == 0)
continue;
if (avail > len)
avail = len;
mb();
memcpy(dst, data, avail);
data += avail;
len -= avail;
/* Other side must not see new header until data is there. */
wmb();
intf->req_prod += avail;
/* This implies mb() before other side sees interrupt. */
notify_remote_via_evtchn(xen_store_evtchn);
}
return (0);
}
int
xb_read(void *tdata, unsigned len, struct lock_object *lock)
{
struct xenstore_domain_interface *intf = xenstore_domain_interface();
XENSTORE_RING_IDX cons, prod;
char *data = (char *)tdata;
int error;
while (len != 0) {
unsigned int avail;
const char *src;
while (intf->rsp_cons == intf->rsp_prod) {
error = _sleep(intf, lock,
PCATCH, "xbread", hz/10);
if (error && error != EWOULDBLOCK)
return (error);
}
/* Read indexes, then verify. */
cons = intf->rsp_cons;
prod = intf->rsp_prod;
if (!xb_check_indexes(cons, prod)) {
intf->rsp_cons = intf->rsp_prod = 0;
return (EIO);
}
src = xb_get_input_chunk(cons, prod, intf->rsp, &avail);
if (avail == 0)
continue;
if (avail > len)
avail = len;
/* We must read header before we read data. */
rmb();
memcpy(data, src, avail);
data += avail;
len -= avail;
/* Other side must not see free space until we've copied out */
mb();
intf->rsp_cons += avail;
/* Implies mb(): they will see new header. */
notify_remote_via_evtchn(xen_store_evtchn);
}
return (0);
}
/* Set up interrupt handler off store event channel. */
int
xb_init_comms(void)
{
struct xenstore_domain_interface *intf = xenstore_domain_interface();
int error;
if (intf->rsp_prod != intf->rsp_cons) {
log(LOG_WARNING, "XENBUS response ring is not quiescent "
"(%08x:%08x): fixing up\n",
intf->rsp_cons, intf->rsp_prod);
intf->rsp_cons = intf->rsp_prod;
}
if (xenstore_irq)
unbind_from_irqhandler(xenstore_irq);
error = bind_caller_port_to_irqhandler(
xen_store_evtchn, "xenbus",
xb_intr, NULL, INTR_TYPE_NET, &xenstore_irq);
if (error) {
log(LOG_WARNING, "XENBUS request irq failed %i\n", error);
return (error);
}
return (0);
}

View File

@ -1,48 +0,0 @@
/*
* Private include for xenbus communications.
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* $FreeBSD$
*/
#ifndef _XENBUS_COMMS_H
#define _XENBUS_COMMS_H
struct sx;
extern int xen_store_evtchn;
extern char *xen_store;
int xs_init(void);
int xb_init_comms(void);
/* Low level routines. */
int xb_write(const void *data, unsigned len, struct lock_object *);
int xb_read(void *data, unsigned len, struct lock_object *);
extern int xenbus_running;
char *kasprintf(const char *fmt, ...);
#endif /* _XENBUS_COMMS_H */

View File

@ -31,7 +31,15 @@
INTERFACE xenbus;
METHOD int backend_changed {
device_t dev;
enum xenbus_state newstate;
/**
* \brief Callback triggered when the state of the otherend
* of a split device changes.
*
* \param _dev NewBus device_t for this XenBus device whose otherend's
* state has changed..
* \param _newstate The new state of the otherend device.
*/
METHOD int otherend_changed {
device_t _dev;
enum xenbus_state _newstate;
};

View File

@ -1,602 +0,0 @@
/******************************************************************************
* Talks to Xen Store to figure out what devices we have.
*
* Copyright (C) 2008 Doug Rabson
* Copyright (C) 2005 Rusty Russell, IBM Corporation
* Copyright (C) 2005 Mike Wray, Hewlett-Packard
* Copyright (C) 2005 XenSource Ltd
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#if 0
#define DPRINTK(fmt, args...) \
printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
#else
#define DPRINTK(fmt, args...) ((void)0)
#endif
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/sx.h>
#include <sys/taskqueue.h>
#include <machine/xen/xen-os.h>
#include <machine/stdarg.h>
#include <xen/gnttab.h>
#include <xen/xenbus/xenbusvar.h>
#include <xen/xenbus/xenbus_comms.h>
struct xenbus_softc {
struct xenbus_watch xs_devicewatch;
struct task xs_probechildren;
struct intr_config_hook xs_attachcb;
device_t xs_dev;
};
struct xenbus_device_ivars {
struct xenbus_watch xd_otherend_watch; /* must be first */
struct sx xd_lock;
device_t xd_dev;
char *xd_node; /* node name in xenstore */
char *xd_type; /* xen device type */
enum xenbus_state xd_state;
int xd_otherend_id;
char *xd_otherend_path;
};
/* Simplified asprintf. */
char *
kasprintf(const char *fmt, ...)
{
va_list ap;
unsigned int len;
char *p, dummy[1];
va_start(ap, fmt);
/* FIXME: vsnprintf has a bug, NULL should work */
len = vsnprintf(dummy, 0, fmt, ap);
va_end(ap);
p = malloc(len + 1, M_DEVBUF, M_WAITOK);
va_start(ap, fmt);
vsprintf(p, fmt, ap);
va_end(ap);
return p;
}
static void
xenbus_identify(driver_t *driver, device_t parent)
{
BUS_ADD_CHILD(parent, 0, "xenbus", 0);
}
static int
xenbus_probe(device_t dev)
{
int err = 0;
DPRINTK("");
/* Initialize the interface to xenstore. */
err = xs_init();
if (err) {
log(LOG_WARNING,
"XENBUS: Error initializing xenstore comms: %i\n", err);
return (ENXIO);
}
err = gnttab_init();
if (err) {
log(LOG_WARNING,
"XENBUS: Error initializing grant table: %i\n", err);
return (ENXIO);
}
device_set_desc(dev, "Xen Devices");
return (0);
}
static enum xenbus_state
xenbus_otherend_state(struct xenbus_device_ivars *ivars)
{
return (xenbus_read_driver_state(ivars->xd_otherend_path));
}
static void
xenbus_backend_changed(struct xenbus_watch *watch, const char **vec,
unsigned int len)
{
struct xenbus_device_ivars *ivars;
device_t dev;
enum xenbus_state newstate;
ivars = (struct xenbus_device_ivars *) watch;
dev = ivars->xd_dev;
if (!ivars->xd_otherend_path
|| strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH],
strlen(ivars->xd_otherend_path)))
return;
newstate = xenbus_otherend_state(ivars);
XENBUS_BACKEND_CHANGED(dev, newstate);
}
static int
xenbus_device_exists(device_t dev, const char *node)
{
device_t *kids;
struct xenbus_device_ivars *ivars;
int i, count, result;
if (device_get_children(dev, &kids, &count))
return (FALSE);
result = FALSE;
for (i = 0; i < count; i++) {
ivars = device_get_ivars(kids[i]);
if (!strcmp(ivars->xd_node, node)) {
result = TRUE;
break;
}
}
free(kids, M_TEMP);
return (result);
}
static int
xenbus_add_device(device_t dev, const char *bus,
const char *type, const char *id)
{
device_t child;
struct xenbus_device_ivars *ivars;
enum xenbus_state state;
char *statepath;
int error;
ivars = malloc(sizeof(struct xenbus_device_ivars),
M_DEVBUF, M_ZERO|M_WAITOK);
ivars->xd_node = kasprintf("%s/%s/%s", bus, type, id);
if (xenbus_device_exists(dev, ivars->xd_node)) {
/*
* We are already tracking this node
*/
free(ivars->xd_node, M_DEVBUF);
free(ivars, M_DEVBUF);
return (0);
}
state = xenbus_read_driver_state(ivars->xd_node);
if (state != XenbusStateInitialising) {
/*
* Device is not new, so ignore it. This can
* happen if a device is going away after
* switching to Closed.
*/
free(ivars->xd_node, M_DEVBUF);
free(ivars, M_DEVBUF);
return (0);
}
/*
* Find the backend details
*/
error = xenbus_gather(XBT_NIL, ivars->xd_node,
"backend-id", "%i", &ivars->xd_otherend_id,
"backend", NULL, &ivars->xd_otherend_path,
NULL);
if (error)
return (error);
sx_init(&ivars->xd_lock, "xdlock");
ivars->xd_type = strdup(type, M_DEVBUF);
ivars->xd_state = XenbusStateInitialising;
statepath = malloc(strlen(ivars->xd_otherend_path)
+ strlen("/state") + 1, M_DEVBUF, M_WAITOK);
sprintf(statepath, "%s/state", ivars->xd_otherend_path);
ivars->xd_otherend_watch.node = statepath;
ivars->xd_otherend_watch.callback = xenbus_backend_changed;
child = device_add_child(dev, NULL, -1);
ivars->xd_dev = child;
device_set_ivars(child, ivars);
return (0);
}
static int
xenbus_enumerate_type(device_t dev, const char *bus, const char *type)
{
char **dir;
unsigned int i, count;
int error;
error = xenbus_directory(XBT_NIL, bus, type, &count, &dir);
if (error)
return (error);
for (i = 0; i < count; i++)
xenbus_add_device(dev, bus, type, dir[i]);
free(dir, M_DEVBUF);
return (0);
}
static int
xenbus_enumerate_bus(device_t dev, const char *bus)
{
char **dir;
unsigned int i, count;
int error;
error = xenbus_directory(XBT_NIL, bus, "", &count, &dir);
if (error)
return (error);
for (i = 0; i < count; i++) {
xenbus_enumerate_type(dev, bus, dir[i]);
}
free(dir, M_DEVBUF);
return (0);
}
static int
xenbus_probe_children(device_t dev)
{
device_t *kids;
struct xenbus_device_ivars *ivars;
int i, count;
/*
* Probe any new devices and register watches for any that
* attach successfully. Since part of the protocol which
* establishes a connection with the other end is interrupt
* driven, we sleep until the device reaches a stable state
* (closed or connected).
*/
if (device_get_children(dev, &kids, &count) == 0) {
for (i = 0; i < count; i++) {
if (device_get_state(kids[i]) != DS_NOTPRESENT)
continue;
if (device_probe_and_attach(kids[i]))
continue;
ivars = device_get_ivars(kids[i]);
register_xenbus_watch(
&ivars->xd_otherend_watch);
sx_xlock(&ivars->xd_lock);
while (ivars->xd_state != XenbusStateClosed
&& ivars->xd_state != XenbusStateConnected)
sx_sleep(&ivars->xd_state, &ivars->xd_lock,
0, "xdattach", 0);
sx_xunlock(&ivars->xd_lock);
}
free(kids, M_TEMP);
}
return (0);
}
static void
xenbus_probe_children_cb(void *arg, int pending)
{
device_t dev = (device_t) arg;
xenbus_probe_children(dev);
}
static void
xenbus_devices_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
struct xenbus_softc *sc = (struct xenbus_softc *) watch;
device_t dev = sc->xs_dev;
char *node, *bus, *type, *id, *p;
node = strdup(vec[XS_WATCH_PATH], M_DEVBUF);
p = strchr(node, '/');
if (!p)
goto out;
bus = node;
*p = 0;
type = p + 1;
p = strchr(type, '/');
if (!p)
goto out;
*p = 0;
id = p + 1;
p = strchr(id, '/');
if (p)
*p = 0;
xenbus_add_device(dev, bus, type, id);
taskqueue_enqueue(taskqueue_thread, &sc->xs_probechildren);
out:
free(node, M_DEVBUF);
}
static void
xenbus_attach_deferred(void *arg)
{
device_t dev = (device_t) arg;
struct xenbus_softc *sc = device_get_softc(dev);
int error;
error = xenbus_enumerate_bus(dev, "device");
if (error)
return;
xenbus_probe_children(dev);
sc->xs_dev = dev;
sc->xs_devicewatch.node = "device";
sc->xs_devicewatch.callback = xenbus_devices_changed;
TASK_INIT(&sc->xs_probechildren, 0, xenbus_probe_children_cb, dev);
register_xenbus_watch(&sc->xs_devicewatch);
config_intrhook_disestablish(&sc->xs_attachcb);
}
static int
xenbus_attach(device_t dev)
{
struct xenbus_softc *sc = device_get_softc(dev);
sc->xs_attachcb.ich_func = xenbus_attach_deferred;
sc->xs_attachcb.ich_arg = dev;
config_intrhook_establish(&sc->xs_attachcb);
return (0);
}
static int
xenbus_suspend(device_t dev)
{
int error;
DPRINTK("");
error = bus_generic_suspend(dev);
if (error)
return (error);
xs_suspend();
return (0);
}
static int
xenbus_resume(device_t dev)
{
device_t *kids;
struct xenbus_device_ivars *ivars;
int i, count, error;
char *statepath;
xb_init_comms();
xs_resume();
/*
* We must re-examine each device and find the new path for
* its backend.
*/
if (device_get_children(dev, &kids, &count) == 0) {
for (i = 0; i < count; i++) {
if (device_get_state(kids[i]) == DS_NOTPRESENT)
continue;
ivars = device_get_ivars(kids[i]);
unregister_xenbus_watch(
&ivars->xd_otherend_watch);
ivars->xd_state = XenbusStateInitialising;
/*
* Find the new backend details and
* re-register our watch.
*/
free(ivars->xd_otherend_path, M_DEVBUF);
error = xenbus_gather(XBT_NIL, ivars->xd_node,
"backend-id", "%i", &ivars->xd_otherend_id,
"backend", NULL, &ivars->xd_otherend_path,
NULL);
if (error)
return (error);
DEVICE_RESUME(kids[i]);
statepath = malloc(strlen(ivars->xd_otherend_path)
+ strlen("/state") + 1, M_DEVBUF, M_WAITOK);
sprintf(statepath, "%s/state", ivars->xd_otherend_path);
free(ivars->xd_otherend_watch.node, M_DEVBUF);
ivars->xd_otherend_watch.node = statepath;
register_xenbus_watch(
&ivars->xd_otherend_watch);
#if 0
/*
* Can't do this yet since we are running in
* the xenwatch thread and if we sleep here,
* we will stop delivering watch notifications
* and the device will never come back online.
*/
sx_xlock(&ivars->xd_lock);
while (ivars->xd_state != XenbusStateClosed
&& ivars->xd_state != XenbusStateConnected)
sx_sleep(&ivars->xd_state, &ivars->xd_lock,
0, "xdresume", 0);
sx_xunlock(&ivars->xd_lock);
#endif
}
free(kids, M_TEMP);
}
return (0);
}
static int
xenbus_print_child(device_t dev, device_t child)
{
struct xenbus_device_ivars *ivars = device_get_ivars(child);
int retval = 0;
retval += bus_print_child_header(dev, child);
retval += printf(" at %s", ivars->xd_node);
retval += bus_print_child_footer(dev, child);
return (retval);
}
static int
xenbus_read_ivar(device_t dev, device_t child, int index,
uintptr_t * result)
{
struct xenbus_device_ivars *ivars = device_get_ivars(child);
switch (index) {
case XENBUS_IVAR_NODE:
*result = (uintptr_t) ivars->xd_node;
return (0);
case XENBUS_IVAR_TYPE:
*result = (uintptr_t) ivars->xd_type;
return (0);
case XENBUS_IVAR_STATE:
*result = (uintptr_t) ivars->xd_state;
return (0);
case XENBUS_IVAR_OTHEREND_ID:
*result = (uintptr_t) ivars->xd_otherend_id;
return (0);
case XENBUS_IVAR_OTHEREND_PATH:
*result = (uintptr_t) ivars->xd_otherend_path;
return (0);
}
return (ENOENT);
}
static int
xenbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
{
struct xenbus_device_ivars *ivars = device_get_ivars(child);
enum xenbus_state newstate;
int currstate;
int error;
switch (index) {
case XENBUS_IVAR_STATE:
newstate = (enum xenbus_state) value;
sx_xlock(&ivars->xd_lock);
if (ivars->xd_state == newstate)
goto out;
error = xenbus_scanf(XBT_NIL, ivars->xd_node, "state",
NULL, "%d", &currstate);
if (error)
goto out;
error = xenbus_printf(XBT_NIL, ivars->xd_node, "state",
"%d", newstate);
if (error) {
if (newstate != XenbusStateClosing) /* Avoid looping */
xenbus_dev_fatal(dev, error, "writing new state");
goto out;
}
ivars->xd_state = newstate;
wakeup(&ivars->xd_state);
out:
sx_xunlock(&ivars->xd_lock);
return (0);
case XENBUS_IVAR_NODE:
case XENBUS_IVAR_TYPE:
case XENBUS_IVAR_OTHEREND_ID:
case XENBUS_IVAR_OTHEREND_PATH:
/*
* These variables are read-only.
*/
return (EINVAL);
}
return (ENOENT);
}
SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xen_store_evtchn, 0, "");
SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
static device_method_t xenbus_methods[] = {
/* Device interface */
DEVMETHOD(device_identify, xenbus_identify),
DEVMETHOD(device_probe, xenbus_probe),
DEVMETHOD(device_attach, xenbus_attach),
DEVMETHOD(device_detach, bus_generic_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
DEVMETHOD(device_suspend, xenbus_suspend),
DEVMETHOD(device_resume, xenbus_resume),
/* Bus interface */
DEVMETHOD(bus_print_child, xenbus_print_child),
DEVMETHOD(bus_read_ivar, xenbus_read_ivar),
DEVMETHOD(bus_write_ivar, xenbus_write_ivar),
{ 0, 0 }
};
static char driver_name[] = "xenbus";
static driver_t xenbus_driver = {
driver_name,
xenbus_methods,
sizeof(struct xenbus_softc),
};
devclass_t xenbus_devclass;
#ifdef XENHVM
DRIVER_MODULE(xenbus, xenpci, xenbus_driver, xenbus_devclass, 0, 0);
#else
DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0);
#endif

View File

@ -1,308 +0,0 @@
/******************************************************************************
* Talks to Xen Store to figure out what devices we have (backend half).
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
* Copyright (C) 2005 Mike Wray, Hewlett-Packard
* Copyright (C) 2005, 2006 XenSource Ltd
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#if 0
#define DPRINTK(fmt, args...) \
printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
#else
#define DPRINTK(fmt, args...) ((void)0)
#endif
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/cdefs.h>
#include <sys/time.h>
#include <sys/sema.h>
#include <sys/eventhandler.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/conf.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/proc.h>
#include <sys/bus.h>
#include <sys/sx.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
#include <machine/xen/xenbus.h>
#include <machine/stdarg.h>
#include <xen/evtchn.h>
#include <xen/xenbus/xenbus_comms.h>
#define BUG_ON PANIC_IF
#define semaphore sema
#define rw_semaphore sema
#define DEFINE_SPINLOCK(lock) struct mtx lock
#define DECLARE_MUTEX(lock) struct sema lock
#define u32 uint32_t
#define list_del(head, ent) TAILQ_REMOVE(head, ent, list)
#define simple_strtoul strtoul
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
#define list_empty TAILQ_EMPTY
extern struct xendev_list_head xenbus_device_backend_list;
#if 0
static int xenbus_uevent_backend(struct device *dev, char **envp,
int num_envp, char *buffer, int buffer_size);
#endif
static int xenbus_probe_backend(const char *type, const char *domid);
static int read_frontend_details(struct xenbus_device *xendev)
{
return read_otherend_details(xendev, "frontend-id", "frontend");
}
/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
{
int domid, err;
const char *devid, *type, *frontend;
unsigned int typelen;
type = strchr(nodename, '/');
if (!type)
return -EINVAL;
type++;
typelen = strcspn(type, "/");
if (!typelen || type[typelen] != '/')
return -EINVAL;
devid = strrchr(nodename, '/') + 1;
err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
"frontend", NULL, &frontend,
NULL);
if (err)
return err;
if (strlen(frontend) == 0)
err = -ERANGE;
if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
err = -ENOENT;
kfree(frontend);
if (err)
return err;
if (snprintf(bus_id, BUS_ID_SIZE,
"%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
return -ENOSPC;
return 0;
}
static struct xen_bus_type xenbus_backend = {
.root = "backend",
.levels = 3, /* backend/type/<frontend>/<id> */
.get_bus_id = backend_bus_id,
.probe = xenbus_probe_backend,
.bus = &xenbus_device_backend_list,
#if 0
.error = -ENODEV,
.bus = {
.name = "xen-backend",
.match = xenbus_match,
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
// .shutdown = xenbus_dev_shutdown,
.uevent = xenbus_uevent_backend,
},
.dev = {
.bus_id = "xen-backend",
},
#endif
};
#if 0
static int xenbus_uevent_backend(struct device *dev, char **envp,
int num_envp, char *buffer, int buffer_size)
{
struct xenbus_device *xdev;
struct xenbus_driver *drv;
int i = 0;
int length = 0;
DPRINTK("");
if (dev == NULL)
return -ENODEV;
xdev = to_xenbus_device(dev);
if (xdev == NULL)
return -ENODEV;
2
/* stuff we want to pass to /sbin/hotplug */
add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
"XENBUS_TYPE=%s", xdev->devicetype);
add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
"XENBUS_PATH=%s", xdev->nodename);
add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
"XENBUS_BASE_PATH=%s", xenbus_backend.root);
/* terminate, set to next free slot, shrink available space */
envp[i] = NULL;
envp = &envp[i];
num_envp -= i;
buffer = &buffer[length];
buffer_size -= length;
if (dev->driver) {
drv = to_xenbus_driver(dev->driver);
if (drv && drv->uevent)
return drv->uevent(xdev, envp, num_envp, buffer,
buffer_size);
}
return 0;
}
#endif
int xenbus_register_backend(struct xenbus_driver *drv)
{
drv->read_otherend_details = read_frontend_details;
return xenbus_register_driver_common(drv, &xenbus_backend);
}
/* backend/<typename>/<frontend-uuid>/<name> */
static int xenbus_probe_backend_unit(const char *dir,
const char *type,
const char *name)
{
char *nodename;
int err;
nodename = kasprintf("%s/%s", dir, name);
if (!nodename)
return -ENOMEM;
DPRINTK("%s\n", nodename);
err = xenbus_probe_node(&xenbus_backend, type, nodename);
kfree(nodename);
return err;
}
/* backend/<typename>/<frontend-domid> */
static int xenbus_probe_backend(const char *type, const char *domid)
{
char *nodename;
int err = 0;
char **dir;
unsigned int i, dir_n = 0;
DPRINTK("");
nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
if (!nodename)
return -ENOMEM;
dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
if (IS_ERR(dir)) {
kfree(nodename);
return PTR_ERR(dir);
}
for (i = 0; i < dir_n; i++) {
err = xenbus_probe_backend_unit(nodename, type, dir[i]);
if (err)
break;
}
kfree(dir);
kfree(nodename);
return err;
}
static void backend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
DPRINTK("");
dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
}
static struct xenbus_watch be_watch = {
.node = "backend",
.callback = backend_changed,
};
#if 0
void xenbus_backend_suspend(int (*fn)(struct device *, void *))
{
DPRINTK("");
if (!xenbus_backend.error)
bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
}
void xenbus_backend_resume(int (*fn)(struct device *, void *))
{
DPRINTK("");
if (!xenbus_backend.error)
bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
}
#endif
void xenbus_backend_probe_and_watch(void)
{
xenbus_probe_devices(&xenbus_backend);
register_xenbus_watch(&be_watch);
}
#if 0
void xenbus_backend_bus_register(void)
{
xenbus_backend.error = bus_register(&xenbus_backend.bus);
if (xenbus_backend.error)
log(LOG_WARNING,
"XENBUS: Error registering backend bus: %i\n",
xenbus_backend.error);
}
void xenbus_backend_device_register(void)
{
if (xenbus_backend.error)
return;
xenbus_backend.error = device_register(&xenbus_backend.dev);
if (xenbus_backend.error) {
bus_unregister(&xenbus_backend.bus);
log(LOG_WARNING,
"XENBUS: Error registering backend device: %i\n",
xenbus_backend.error);
}
}
#endif

View File

@ -1,935 +0,0 @@
/******************************************************************************
* xenbus_xs.c
*
* This is the kernel equivalent of the "xs" library. We don't need everything
* and we use xenbus_comms for communication.
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/uio.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sx.h>
#include <sys/syslog.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kthread.h>
#include <sys/unistd.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
#include <machine/stdarg.h>
#include <xen/xenbus/xenbusvar.h>
#include <xen/xenbus/xenbus_comms.h>
#include <xen/interface/hvm/params.h>
#include <vm/vm.h>
#include <vm/pmap.h>
static int xs_process_msg(enum xsd_sockmsg_type *type);
int xenwatch_running = 0;
int xenbus_running = 0;
int xen_store_evtchn;
struct xs_stored_msg {
TAILQ_ENTRY(xs_stored_msg) list;
struct xsd_sockmsg hdr;
union {
/* Queued replies. */
struct {
char *body;
} reply;
/* Queued watch events. */
struct {
struct xenbus_watch *handle;
char **vec;
unsigned int vec_size;
} watch;
} u;
};
struct xs_handle {
/* A list of replies. Currently only one will ever be outstanding. */
TAILQ_HEAD(xs_handle_list, xs_stored_msg) reply_list;
struct mtx reply_lock;
int reply_waitq;
/* One request at a time. */
struct sx request_mutex;
/* Protect transactions against save/restore. */
struct sx suspend_mutex;
};
static struct xs_handle xs_state;
/* List of registered watches, and a lock to protect it. */
static LIST_HEAD(watch_list_head, xenbus_watch) watches;
static struct mtx watches_lock;
/* List of pending watch callback events, and a lock to protect it. */
static TAILQ_HEAD(event_list_head, xs_stored_msg) watch_events;
static struct mtx watch_events_lock;
/*
* Details of the xenwatch callback kernel thread. The thread waits on the
* watch_events_waitq for work to do (queued on watch_events list). When it
* wakes up it acquires the xenwatch_mutex before reading the list and
* carrying out work.
*/
static pid_t xenwatch_pid;
struct sx xenwatch_mutex;
static int watch_events_waitq;
#define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0]))
static int
xs_get_error(const char *errorstring)
{
unsigned int i;
for (i = 0; i < xsd_error_count; i++) {
if (!strcmp(errorstring, xsd_errors[i].errstring))
return (xsd_errors[i].errnum);
}
log(LOG_WARNING, "XENBUS xen store gave: unknown error %s",
errorstring);
return (EINVAL);
}
extern void kdb_backtrace(void);
static int
xs_read_reply(enum xsd_sockmsg_type *type, unsigned int *len, void **result)
{
struct xs_stored_msg *msg;
char *body;
int error;
mtx_lock(&xs_state.reply_lock);
while (TAILQ_EMPTY(&xs_state.reply_list)) {
while (TAILQ_EMPTY(&xs_state.reply_list)) {
error = mtx_sleep(&xs_state.reply_waitq,
&xs_state.reply_lock,
PCATCH, "xswait", hz/10);
if (error && error != EWOULDBLOCK) {
mtx_unlock(&xs_state.reply_lock);
return (error);
}
}
}
msg = TAILQ_FIRST(&xs_state.reply_list);
TAILQ_REMOVE(&xs_state.reply_list, msg, list);
mtx_unlock(&xs_state.reply_lock);
*type = msg->hdr.type;
if (len)
*len = msg->hdr.len;
body = msg->u.reply.body;
free(msg, M_DEVBUF);
*result = body;
return (0);
}
#if 0
/* Emergency write. UNUSED*/
void xenbus_debug_write(const char *str, unsigned int count)
{
struct xsd_sockmsg msg = { 0 };
msg.type = XS_DEBUG;
msg.len = sizeof("print") + count + 1;
sx_xlock(&xs_state.request_mutex);
xb_write(&msg, sizeof(msg));
xb_write("print", sizeof("print"));
xb_write(str, count);
xb_write("", 1);
sx_xunlock(&xs_state.request_mutex);
}
#endif
int
xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
{
struct xsd_sockmsg req_msg = *msg;
int error;
if (req_msg.type == XS_TRANSACTION_START)
sx_slock(&xs_state.suspend_mutex);
sx_xlock(&xs_state.request_mutex);
error = xb_write(msg, sizeof(*msg) + msg->len,
&xs_state.request_mutex.lock_object);
if (error) {
msg->type = XS_ERROR;
} else {
error = xs_read_reply(&msg->type, &msg->len, result);
}
sx_xunlock(&xs_state.request_mutex);
if ((msg->type == XS_TRANSACTION_END) ||
((req_msg.type == XS_TRANSACTION_START) &&
(msg->type == XS_ERROR)))
sx_sunlock(&xs_state.suspend_mutex);
return (error);
}
/*
* Send message to xs. The reply is returned in *result and should be
* fred with free(*result, M_DEVBUF). Return zero on success or an
* error code on failure.
*/
static int
xs_talkv(struct xenbus_transaction t, enum xsd_sockmsg_type type,
const struct iovec *iovec, unsigned int num_vecs,
unsigned int *len, void **result)
{
struct xsd_sockmsg msg;
void *ret = NULL;
unsigned int i;
int error;
msg.tx_id = t.id;
msg.req_id = 0;
msg.type = type;
msg.len = 0;
for (i = 0; i < num_vecs; i++)
msg.len += iovec[i].iov_len;
sx_xlock(&xs_state.request_mutex);
error = xb_write(&msg, sizeof(msg),
&xs_state.request_mutex.lock_object);
if (error) {
sx_xunlock(&xs_state.request_mutex);
printf("xs_talkv failed %d\n", error);
return (error);
}
for (i = 0; i < num_vecs; i++) {
error = xb_write(iovec[i].iov_base, iovec[i].iov_len,
&xs_state.request_mutex.lock_object);
if (error) {
sx_xunlock(&xs_state.request_mutex);
printf("xs_talkv failed %d\n", error);
return (error);
}
}
error = xs_read_reply(&msg.type, len, &ret);
sx_xunlock(&xs_state.request_mutex);
if (error)
return (error);
if (msg.type == XS_ERROR) {
error = xs_get_error(ret);
free(ret, M_DEVBUF);
return (error);
}
#if 0
if ((xenwatch_running == 0) && (xenwatch_inline == 0)) {
xenwatch_inline = 1;
while (!TAILQ_EMPTY(&watch_events)
&& xenwatch_running == 0) {
struct xs_stored_msg *wmsg = TAILQ_FIRST(&watch_events);
TAILQ_REMOVE(&watch_events, wmsg, list);
wmsg->u.watch.handle->callback(
wmsg->u.watch.handle,
(const char **)wmsg->u.watch.vec,
wmsg->u.watch.vec_size);
free(wmsg->u.watch.vec, M_DEVBUF);
free(wmsg, M_DEVBUF);
}
xenwatch_inline = 0;
}
#endif
KASSERT(msg.type == type, ("bad xenstore message type"));
if (result)
*result = ret;
else
free(ret, M_DEVBUF);
return (0);
}
/* Simplified version of xs_talkv: single message. */
static int
xs_single(struct xenbus_transaction t, enum xsd_sockmsg_type type,
const char *string, unsigned int *len, void **result)
{
struct iovec iovec;
iovec.iov_base = (void *)(uintptr_t) string;
iovec.iov_len = strlen(string) + 1;
return (xs_talkv(t, type, &iovec, 1, len, result));
}
static unsigned int
count_strings(const char *strings, unsigned int len)
{
unsigned int num;
const char *p;
for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
num++;
return num;
}
/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
static char *
join(const char *dir, const char *name)
{
char *buffer;
buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1,
M_DEVBUF, M_WAITOK);
strcpy(buffer, dir);
if (strcmp(name, "")) {
strcat(buffer, "/");
strcat(buffer, name);
}
return (buffer);
}
static char **
split(char *strings, unsigned int len, unsigned int *num)
{
char *p, **ret;
/* Count the strings. */
*num = count_strings(strings, len) + 1;
/* Transfer to one big alloc for easy freeing. */
ret = malloc(*num * sizeof(char *) + len, M_DEVBUF, M_WAITOK);
memcpy(&ret[*num], strings, len);
free(strings, M_DEVBUF);
strings = (char *)&ret[*num];
for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
ret[(*num)++] = p;
ret[*num] = strings + len;
return ret;
}
/*
* Return the contents of a directory in *result which should be freed
* with free(*result, M_DEVBUF).
*/
int
xenbus_directory(struct xenbus_transaction t, const char *dir,
const char *node, unsigned int *num, char ***result)
{
char *strings, *path;
unsigned int len = 0;
int error;
path = join(dir, node);
error = xs_single(t, XS_DIRECTORY, path, &len, (void **) &strings);
free(path, M_DEVBUF);
if (error)
return (error);
*result = split(strings, len, num);
return (0);
}
/*
* Check if a path exists. Return 1 if it does.
*/
int
xenbus_exists(struct xenbus_transaction t, const char *dir, const char *node)
{
char **d;
int error, dir_n;
error = xenbus_directory(t, dir, node, &dir_n, &d);
if (error)
return (0);
free(d, M_DEVBUF);
return (1);
}
/*
* Get the value of a single file. Returns the contents in *result
* which should be freed with free(*result, M_DEVBUF) after use.
* The length of the value in bytes is returned in *len.
*/
int
xenbus_read(struct xenbus_transaction t, const char *dir, const char *node,
unsigned int *len, void **result)
{
char *path;
void *ret;
int error;
path = join(dir, node);
error = xs_single(t, XS_READ, path, len, &ret);
free(path, M_DEVBUF);
if (error)
return (error);
*result = ret;
return (0);
}
/*
* Write the value of a single file. Returns error on failure.
*/
int
xenbus_write(struct xenbus_transaction t, const char *dir, const char *node,
const char *string)
{
char *path;
struct iovec iovec[2];
int error;
path = join(dir, node);
iovec[0].iov_base = (void *)(uintptr_t) path;
iovec[0].iov_len = strlen(path) + 1;
iovec[1].iov_base = (void *)(uintptr_t) string;
iovec[1].iov_len = strlen(string);
error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
free(path, M_DEVBUF);
return (error);
}
/*
* Create a new directory.
*/
int
xenbus_mkdir(struct xenbus_transaction t, const char *dir, const char *node)
{
char *path;
int ret;
path = join(dir, node);
ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
free(path, M_DEVBUF);
return (ret);
}
/*
* Destroy a file or directory (directories must be empty).
*/
int
xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
{
char *path;
int ret;
path = join(dir, node);
ret = xs_single(t, XS_RM, path, NULL, NULL);
free(path, M_DEVBUF);
return (ret);
}
/*
* Start a transaction: changes by others will not be seen during this
* transaction, and changes will not be visible to others until end.
*/
int
xenbus_transaction_start(struct xenbus_transaction *t)
{
char *id_str;
int error;
sx_slock(&xs_state.suspend_mutex);
error = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL,
(void **) &id_str);
if (error) {
sx_sunlock(&xs_state.suspend_mutex);
return (error);
}
t->id = strtoul(id_str, NULL, 0);
free(id_str, M_DEVBUF);
return (0);
}
/*
* End a transaction. If abandon is true, transaction is discarded
* instead of committed.
*/
int xenbus_transaction_end(struct xenbus_transaction t, int abort)
{
char abortstr[2];
int error;
if (abort)
strcpy(abortstr, "F");
else
strcpy(abortstr, "T");
error = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
sx_sunlock(&xs_state.suspend_mutex);
return (error);
}
/* Single read and scanf: returns zero or errno. */
int
xenbus_scanf(struct xenbus_transaction t,
const char *dir, const char *node, int *scancountp, const char *fmt, ...)
{
va_list ap;
int error, ns;
char *val;
error = xenbus_read(t, dir, node, NULL, (void **) &val);
if (error)
return (error);
va_start(ap, fmt);
ns = vsscanf(val, fmt, ap);
va_end(ap);
free(val, M_DEVBUF);
/* Distinctive errno. */
if (ns == 0)
return (ERANGE);
if (scancountp)
*scancountp = ns;
return (0);
}
/* Single printf and write: returns zero or errno. */
int
xenbus_printf(struct xenbus_transaction t,
const char *dir, const char *node, const char *fmt, ...)
{
va_list ap;
int error, ret;
#define PRINTF_BUFFER_SIZE 4096
char *printf_buffer;
printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK);
va_start(ap, fmt);
ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
va_end(ap);
KASSERT(ret <= PRINTF_BUFFER_SIZE-1, ("xenbus_printf: message too large"));
error = xenbus_write(t, dir, node, printf_buffer);
free(printf_buffer, M_DEVBUF);
return (error);
}
/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
int
xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
{
va_list ap;
const char *name;
int error, i;
for (i = 0; i < 10000; i++)
HYPERVISOR_yield();
va_start(ap, dir);
error = 0;
while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
const char *fmt = va_arg(ap, char *);
void *result = va_arg(ap, void *);
char *p;
error = xenbus_read(t, dir, name, NULL, (void **) &p);
if (error)
break;
if (fmt) {
if (sscanf(p, fmt, result) == 0)
error = EINVAL;
free(p, M_DEVBUF);
} else
*(char **)result = p;
}
va_end(ap);
return (error);
}
static int
xs_watch(const char *path, const char *token)
{
struct iovec iov[2];
iov[0].iov_base = (void *)(uintptr_t) path;
iov[0].iov_len = strlen(path) + 1;
iov[1].iov_base = (void *)(uintptr_t) token;
iov[1].iov_len = strlen(token) + 1;
return (xs_talkv(XBT_NIL, XS_WATCH, iov, 2, NULL, NULL));
}
static int
xs_unwatch(const char *path, const char *token)
{
struct iovec iov[2];
iov[0].iov_base = (void *)(uintptr_t) path;
iov[0].iov_len = strlen(path) + 1;
iov[1].iov_base = (void *)(uintptr_t) token;
iov[1].iov_len = strlen(token) + 1;
return (xs_talkv(XBT_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
}
static struct xenbus_watch *
find_watch(const char *token)
{
struct xenbus_watch *i, *cmp;
cmp = (void *)strtoul(token, NULL, 16);
LIST_FOREACH(i, &watches, list)
if (i == cmp)
return (i);
return (NULL);
}
/* Register callback to watch this node. */
int
register_xenbus_watch(struct xenbus_watch *watch)
{
/* Pointer in ascii is the token. */
char token[sizeof(watch) * 2 + 1];
int error;
sprintf(token, "%lX", (long)watch);
sx_slock(&xs_state.suspend_mutex);
mtx_lock(&watches_lock);
KASSERT(find_watch(token) == NULL, ("watch already registered"));
LIST_INSERT_HEAD(&watches, watch, list);
mtx_unlock(&watches_lock);
error = xs_watch(watch->node, token);
/* Ignore errors due to multiple registration. */
if (error == EEXIST) {
mtx_lock(&watches_lock);
LIST_REMOVE(watch, list);
mtx_unlock(&watches_lock);
}
sx_sunlock(&xs_state.suspend_mutex);
return (error);
}
void
unregister_xenbus_watch(struct xenbus_watch *watch)
{
struct xs_stored_msg *msg, *tmp;
char token[sizeof(watch) * 2 + 1];
int error;
sprintf(token, "%lX", (long)watch);
sx_slock(&xs_state.suspend_mutex);
mtx_lock(&watches_lock);
KASSERT(find_watch(token), ("watch not registered"));
LIST_REMOVE(watch, list);
mtx_unlock(&watches_lock);
error = xs_unwatch(watch->node, token);
if (error)
log(LOG_WARNING, "XENBUS Failed to release watch %s: %i\n",
watch->node, error);
sx_sunlock(&xs_state.suspend_mutex);
/* Cancel pending watch events. */
mtx_lock(&watch_events_lock);
TAILQ_FOREACH_SAFE(msg, &watch_events, list, tmp) {
if (msg->u.watch.handle != watch)
continue;
TAILQ_REMOVE(&watch_events, msg, list);
free(msg->u.watch.vec, M_DEVBUF);
free(msg, M_DEVBUF);
}
mtx_unlock(&watch_events_lock);
/* Flush any currently-executing callback, unless we are it. :-) */
if (curproc->p_pid != xenwatch_pid) {
sx_xlock(&xenwatch_mutex);
sx_xunlock(&xenwatch_mutex);
}
}
void
xs_suspend(void)
{
sx_xlock(&xs_state.suspend_mutex);
sx_xlock(&xs_state.request_mutex);
}
void
xs_resume(void)
{
struct xenbus_watch *watch;
char token[sizeof(watch) * 2 + 1];
sx_xunlock(&xs_state.request_mutex);
/* No need for watches_lock: the suspend_mutex is sufficient. */
LIST_FOREACH(watch, &watches, list) {
sprintf(token, "%lX", (long)watch);
xs_watch(watch->node, token);
}
sx_xunlock(&xs_state.suspend_mutex);
}
static void
xenwatch_thread(void *unused)
{
struct xs_stored_msg *msg;
for (;;) {
mtx_lock(&watch_events_lock);
while (TAILQ_EMPTY(&watch_events))
mtx_sleep(&watch_events_waitq,
&watch_events_lock,
PWAIT | PCATCH, "waitev", hz/10);
mtx_unlock(&watch_events_lock);
sx_xlock(&xenwatch_mutex);
mtx_lock(&watch_events_lock);
msg = TAILQ_FIRST(&watch_events);
if (msg)
TAILQ_REMOVE(&watch_events, msg, list);
mtx_unlock(&watch_events_lock);
if (msg != NULL) {
/*
* XXX There are messages coming in with a NULL callback.
* XXX This deserves further investigation; the workaround
* XXX here simply prevents the kernel from panic'ing
* XXX on startup.
*/
if (msg->u.watch.handle->callback != NULL)
msg->u.watch.handle->callback(
msg->u.watch.handle,
(const char **)msg->u.watch.vec,
msg->u.watch.vec_size);
free(msg->u.watch.vec, M_DEVBUF);
free(msg, M_DEVBUF);
}
sx_xunlock(&xenwatch_mutex);
}
}
static int
xs_process_msg(enum xsd_sockmsg_type *type)
{
struct xs_stored_msg *msg;
char *body;
int error;
msg = malloc(sizeof(*msg), M_DEVBUF, M_WAITOK);
mtx_lock(&xs_state.reply_lock);
error = xb_read(&msg->hdr, sizeof(msg->hdr),
&xs_state.reply_lock.lock_object);
mtx_unlock(&xs_state.reply_lock);
if (error) {
free(msg, M_DEVBUF);
return (error);
}
body = malloc(msg->hdr.len + 1, M_DEVBUF, M_WAITOK);
mtx_lock(&xs_state.reply_lock);
error = xb_read(body, msg->hdr.len,
&xs_state.reply_lock.lock_object);
mtx_unlock(&xs_state.reply_lock);
if (error) {
free(body, M_DEVBUF);
free(msg, M_DEVBUF);
return (error);
}
body[msg->hdr.len] = '\0';
*type = msg->hdr.type;
if (msg->hdr.type == XS_WATCH_EVENT) {
msg->u.watch.vec = split(body, msg->hdr.len,
&msg->u.watch.vec_size);
mtx_lock(&watches_lock);
msg->u.watch.handle = find_watch(
msg->u.watch.vec[XS_WATCH_TOKEN]);
if (msg->u.watch.handle != NULL) {
mtx_lock(&watch_events_lock);
TAILQ_INSERT_TAIL(&watch_events, msg, list);
wakeup(&watch_events_waitq);
mtx_unlock(&watch_events_lock);
} else {
free(msg->u.watch.vec, M_DEVBUF);
free(msg, M_DEVBUF);
}
mtx_unlock(&watches_lock);
} else {
msg->u.reply.body = body;
mtx_lock(&xs_state.reply_lock);
TAILQ_INSERT_TAIL(&xs_state.reply_list, msg, list);
wakeup(&xs_state.reply_waitq);
mtx_unlock(&xs_state.reply_lock);
}
return 0;
}
static void
xenbus_thread(void *unused)
{
int error;
enum xsd_sockmsg_type type;
xenbus_running = 1;
for (;;) {
error = xs_process_msg(&type);
if (error)
printf("XENBUS error %d while reading message\n",
error);
}
}
#ifdef XENHVM
static unsigned long xen_store_mfn;
char *xen_store;
static inline unsigned long
hvm_get_parameter(int index)
{
struct xen_hvm_param xhv;
int error;
xhv.domid = DOMID_SELF;
xhv.index = index;
error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
if (error) {
printf("hvm_get_parameter: failed to get %d, error %d\n",
index, error);
return (0);
}
return (xhv.value);
}
#endif
int
xs_init(void)
{
int error;
struct proc *p;
#ifdef XENHVM
xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
xen_store = pmap_mapdev(xen_store_mfn * PAGE_SIZE, PAGE_SIZE);
#else
xen_store_evtchn = xen_start_info->store_evtchn;
#endif
TAILQ_INIT(&xs_state.reply_list);
TAILQ_INIT(&watch_events);
sx_init(&xenwatch_mutex, "xenwatch");
mtx_init(&xs_state.reply_lock, "state reply", NULL, MTX_DEF);
sx_init(&xs_state.request_mutex, "xenstore request");
sx_init(&xs_state.suspend_mutex, "xenstore suspend");
#if 0
mtx_init(&xs_state.suspend_mutex, "xenstore suspend", NULL, MTX_DEF);
sema_init(&xs_state.request_mutex, 1, "xenstore request");
sema_init(&xenwatch_mutex, 1, "xenwatch");
#endif
mtx_init(&watches_lock, "watches", NULL, MTX_DEF);
mtx_init(&watch_events_lock, "watch events", NULL, MTX_DEF);
/* Initialize the shared memory rings to talk to xenstored */
error = xb_init_comms();
if (error)
return (error);
xenwatch_running = 1;
error = kproc_create(xenwatch_thread, NULL, &p,
RFHIGHPID, 0, "xenwatch");
if (error)
return (error);
xenwatch_pid = p->p_pid;
error = kproc_create(xenbus_thread, NULL, NULL,
RFHIGHPID, 0, "xenbus");
return (error);
}

878
sys/xen/xenbus/xenbusb.c Normal file
View File

@ -0,0 +1,878 @@
/******************************************************************************
* Copyright (C) 2010 Spectra Logic Corporation
* Copyright (C) 2008 Doug Rabson
* Copyright (C) 2005 Rusty Russell, IBM Corporation
* Copyright (C) 2005 Mike Wray, Hewlett-Packard
* Copyright (C) 2005 XenSource Ltd
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* \file xenbusb.c
*
* \brief Shared support functions for managing the NewBus busses that contain
* Xen front and back end device instances.
*
* The NewBus implementation of XenBus attaches a xenbusb_front and xenbusb_back
* child bus to the xenstore device. This strategy allows the small differences
* in the handling of XenBus operations for front and back devices to be handled
* as overrides in xenbusb_front/back.c. Front and back specific device
* classes are also provided so device drivers can register for the devices they
* can handle without the need to filter within their probe routines. The
* net result is a device hierarchy that might look like this:
*
* xenstore0/
* xenbusb_front0/
* xn0
* xbd0
* xbd1
* xenbusb_back0/
* xbbd0
* xnb0
* xnb1
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/sx.h>
#include <sys/taskqueue.h>
#include <machine/xen/xen-os.h>
#include <machine/stdarg.h>
#include <xen/gnttab.h>
#include <xen/xenstore/xenstorevar.h>
#include <xen/xenbus/xenbusb.h>
#include <xen/xenbus/xenbusvar.h>
/*------------------------- Private Functions --------------------------------*/
/**
* \brief Deallocate XenBus device instance variables.
*
* \param ivars The instance variable block to free.
*/
static void
xenbusb_free_child_ivars(struct xenbus_device_ivars *ivars)
{
if (ivars->xd_otherend_watch.node != NULL) {
xs_unregister_watch(&ivars->xd_otherend_watch);
free(ivars->xd_otherend_watch.node, M_XENBUS);
ivars->xd_otherend_watch.node = NULL;
}
if (ivars->xd_node != NULL) {
free(ivars->xd_node, M_XENBUS);
ivars->xd_node = NULL;
}
if (ivars->xd_type != NULL) {
free(ivars->xd_type, M_XENBUS);
ivars->xd_type = NULL;
}
if (ivars->xd_otherend_path != NULL) {
free(ivars->xd_otherend_path, M_XENBUS);
ivars->xd_otherend_path = NULL;
}
free(ivars, M_XENBUS);
}
/**
* XenBus watch callback registered against the "state" XenStore
* node of the other-end of a split device connection.
*
* This callback is invoked whenever the state of a device instance's
* peer changes.
*
* \param watch The xs_watch object used to register this callback
* function.
* \param vec An array of pointers to NUL terminated strings containing
* watch event data. The vector should be indexed via the
* xs_watch_type enum in xs_wire.h.
* \param vec_size The number of elements in vec.
*
* \return The device_t of the found device if any, or NULL.
*
* \note device_t is a pointer type, so it can be compared against
* NULL for validity.
*/
static void
xenbusb_otherend_changed(struct xs_watch *watch, const char **vec,
unsigned int vec_size __unused)
{
struct xenbus_device_ivars *ivars;
device_t dev;
enum xenbus_state newstate;
ivars = (struct xenbus_device_ivars *) watch;
dev = ivars->xd_dev;
if (!ivars->xd_otherend_path
|| strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH],
strlen(ivars->xd_otherend_path)))
return;
newstate = xenbus_read_driver_state(ivars->xd_otherend_path);
XENBUS_OTHEREND_CHANGED(dev, newstate);
}
/**
* Search our internal record of configured devices (not the XenStore)
* to determine if the XenBus device indicated by \a node is known to
* the system.
*
* \param dev The XenBus bus instance to search for device children.
* \param node The XenStore node path for the device to find.
*
* \return The device_t of the found device if any, or NULL.
*
* \note device_t is a pointer type, so it can be compared against
* NULL for validity.
*/
static device_t
xenbusb_device_exists(device_t dev, const char *node)
{
device_t *kids;
device_t result;
struct xenbus_device_ivars *ivars;
int i, count;
if (device_get_children(dev, &kids, &count))
return (FALSE);
result = NULL;
for (i = 0; i < count; i++) {
ivars = device_get_ivars(kids[i]);
if (!strcmp(ivars->xd_node, node)) {
result = kids[i];
break;
}
}
free(kids, M_TEMP);
return (result);
}
static void
xenbusb_delete_child(device_t dev, device_t child)
{
struct xenbus_device_ivars *ivars;
ivars = device_get_ivars(child);
/*
* We no longer care about the otherend of the
* connection. Cancel the watch now so that we
* don't try to handle an event for a partially
* detached child.
*/
if (ivars->xd_otherend_watch.node != NULL)
xs_unregister_watch(&ivars->xd_otherend_watch);
device_delete_child(dev, child);
xenbusb_free_child_ivars(ivars);
}
/**
* \param dev The NewBus device representing this XenBus bus.
* \param child The NewBus device representing a child of dev%'s XenBus bus.
*/
static void
xenbusb_verify_device(device_t dev, device_t child)
{
if (xs_exists(XST_NIL, xenbus_get_node(child), "") == 0) {
/*
* Device tree has been removed from Xenbus.
* Tear down the device.
*/
xenbusb_delete_child(dev, child);
}
}
/**
* \brief Enumerate the devices on a XenBus bus and register them with
* the NewBus device tree.
*
* xenbusb_enumerate_bus() will create entries (in state DS_NOTPRESENT)
* for nodes that appear in the XenStore, but will not invoke probe/attach
* operations on drivers. Probe/Attach processing must be separately
* performed via an invocation of xenbusb_probe_children(). This is usually
* done via the xbs_probe_children task.
*
* \param xbs XenBus Bus device softc of the owner of the bus to enumerate.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
static int
xenbusb_enumerate_bus(struct xenbusb_softc *xbs)
{
const char **types;
u_int type_idx;
u_int type_count;
int error;
error = xs_directory(XST_NIL, xbs->xbs_node, "", &type_count, &types);
if (error)
return (error);
for (type_idx = 0; type_idx < type_count; type_idx++)
XENBUSB_ENUMERATE_TYPE(xbs->xbs_dev, types[type_idx]);
free(types, M_XENSTORE);
return (0);
}
/**
* Handler for all generic XenBus device systcl nodes.
*/
static int
xenbusb_device_sysctl_handler(SYSCTL_HANDLER_ARGS)
{
device_t dev;
const char *value;
dev = (device_t)arg1;
switch (arg2) {
case XENBUS_IVAR_NODE:
value = xenbus_get_node(dev);
break;
case XENBUS_IVAR_TYPE:
value = xenbus_get_type(dev);
break;
case XENBUS_IVAR_STATE:
value = xenbus_strstate(xenbus_get_state(dev));
break;
case XENBUS_IVAR_OTHEREND_ID:
return (sysctl_handle_int(oidp, NULL,
xenbus_get_otherend_id(dev),
req));
/* NOTREACHED */
case XENBUS_IVAR_OTHEREND_PATH:
value = xenbus_get_otherend_path(dev);
break;
default:
return (EINVAL);
}
return (SYSCTL_OUT(req, value, strlen(value)));
}
/**
* Create read-only systcl nodes for xenbusb device ivar data.
*
* \param dev The XenBus device instance to register with sysctl.
*/
static void
xenbusb_device_sysctl_init(device_t dev)
{
struct sysctl_ctx_list *ctx;
struct sysctl_oid *tree;
ctx = device_get_sysctl_ctx(dev);
tree = device_get_sysctl_tree(dev);
SYSCTL_ADD_PROC(ctx,
SYSCTL_CHILDREN(tree),
OID_AUTO,
"xenstore_path",
CTLFLAG_RD,
dev,
XENBUS_IVAR_NODE,
xenbusb_device_sysctl_handler,
"A",
"XenStore path to device");
SYSCTL_ADD_PROC(ctx,
SYSCTL_CHILDREN(tree),
OID_AUTO,
"xenbus_dev_type",
CTLFLAG_RD,
dev,
XENBUS_IVAR_TYPE,
xenbusb_device_sysctl_handler,
"A",
"XenBus device type");
SYSCTL_ADD_PROC(ctx,
SYSCTL_CHILDREN(tree),
OID_AUTO,
"xenbus_connection_state",
CTLFLAG_RD,
dev,
XENBUS_IVAR_STATE,
xenbusb_device_sysctl_handler,
"A",
"XenBus state of peer connection");
SYSCTL_ADD_PROC(ctx,
SYSCTL_CHILDREN(tree),
OID_AUTO,
"xenbus_peer_domid",
CTLFLAG_RD,
dev,
XENBUS_IVAR_OTHEREND_ID,
xenbusb_device_sysctl_handler,
"I",
"Xen domain ID of peer");
SYSCTL_ADD_PROC(ctx,
SYSCTL_CHILDREN(tree),
OID_AUTO,
"xenstore_peer_path",
CTLFLAG_RD,
dev,
XENBUS_IVAR_OTHEREND_PATH,
xenbusb_device_sysctl_handler,
"A",
"XenStore path to peer device");
}
/**
* \brief Verify the existance of attached device instances and perform
* probe/attach processing for newly arrived devices.
*
* \param dev The NewBus device representing this XenBus bus.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
static int
xenbusb_probe_children(device_t dev)
{
device_t *kids;
struct xenbus_device_ivars *ivars;
int i, count;
if (device_get_children(dev, &kids, &count) == 0) {
for (i = 0; i < count; i++) {
if (device_get_state(kids[i]) != DS_NOTPRESENT) {
/*
* We already know about this one.
* Make sure it's still here.
*/
xenbusb_verify_device(dev, kids[i]);
continue;
}
if (device_probe_and_attach(kids[i])) {
/*
* Transition device to the closed state
* so the world knows that attachment will
* not occur.
*/
xenbus_set_state(kids[i], XenbusStateClosed);
/*
* Remove our record of this device.
* So long as it remains in the closed
* state in the XenStore, we will not find
* it again. The state will only change
* if the control domain actively reconfigures
* this device.
*/
xenbusb_delete_child(dev, kids[i]);
continue;
}
/*
* Augment default newbus provided dynamic sysctl
* variables with the standard ivar contents of
* XenBus devices.
*/
xenbusb_device_sysctl_init(kids[i]);
/*
* Now that we have a driver managing this device
* that can receive otherend state change events,
* hook up a watch for them.
*/
ivars = device_get_ivars(kids[i]);
xs_register_watch(&ivars->xd_otherend_watch);
}
free(kids, M_TEMP);
}
return (0);
}
/**
* \brief Task callback function to perform XenBus probe operations
* from a known safe context.
*
* \param arg The NewBus device_t representing the bus instance to
* on which to perform probe processing.
* \param pending The number of times this task was queued before it could
* be run.
*/
static void
xenbusb_probe_children_cb(void *arg, int pending __unused)
{
device_t dev = (device_t)arg;
/*
* Hold Giant until the Giant free newbus changes are committed.
*/
mtx_lock(&Giant);
xenbusb_probe_children(dev);
mtx_unlock(&Giant);
}
/**
* \brief XenStore watch callback for the root node of the XenStore
* subtree representing a XenBus.
*
* This callback performs, or delegates to the xbs_probe_children task,
* all processing necessary to handle dynmaic device arrival and departure
* events from a XenBus.
*
* \param watch The XenStore watch object associated with this callback.
* \param vec The XenStore watch event data.
* \param len The number of fields in the event data stream.
*/
static void
xenbusb_devices_changed(struct xs_watch *watch, const char **vec,
unsigned int len)
{
struct xenbusb_softc *xbs;
device_t dev;
char *node;
char *bus;
char *type;
char *id;
char *p;
u_int component;
xbs = (struct xenbusb_softc *)watch;
dev = xbs->xbs_dev;
if (len <= XS_WATCH_PATH) {
device_printf(dev, "xenbusb_devices_changed: "
"Short Event Data.\n");
return;
}
node = strdup(vec[XS_WATCH_PATH], M_XENBUS);
p = strchr(node, '/');
if (p == NULL)
goto out;
bus = node;
*p = 0;
type = p + 1;
p = strchr(type, '/');
if (p == NULL)
goto out;
*p++ = 0;
/*
* Extract the device ID. A device ID has one or more path
* components separated by the '/' character.
*
* e.g. "<frontend vm id>/<frontend dev id>" for backend devices.
*/
id = p;
for (component = 0; component < xbs->xbs_id_components; component++) {
p = strchr(p, '/');
if (p == NULL)
break;
p++;
}
if (p != NULL)
*p = 0;
if (*id != 0 && component >= xbs->xbs_id_components - 1) {
xenbusb_add_device(xbs->xbs_dev, type, id);
taskqueue_enqueue(taskqueue_thread, &xbs->xbs_probe_children);
}
out:
free(node, M_XENBUS);
}
/**
* \brief Interrupt configuration hook callback associated with xbs_attch_ch.
*
* Since interrupts are always functional at the time of XenBus configuration,
* there is nothing to be done when the callback occurs. This hook is only
* registered to hold up boot processing while XenBus devices come online.
*
* \param arg Unused configuration hook callback argument.
*/
static void
xenbusb_nop_confighook_cb(void *arg __unused)
{
}
/**
* \brief Decrement the number of XenBus child devices in the
* connecting state by one and release the xbs_attch_ch
* interrupt configuration hook if the connecting count
* drops to zero.
*
* \param xbs XenBus Bus device softc of the owner of the bus to enumerate.
*/
static void
xenbusb_release_confighook(struct xenbusb_softc *xbs)
{
mtx_lock(&xbs->xbs_lock);
KASSERT(xbs->xbs_connecting_children > 0,
("Connecting device count error\n"));
xbs->xbs_connecting_children--;
if (xbs->xbs_connecting_children == 0
&& (xbs->xbs_flags & XBS_ATTACH_CH_ACTIVE) != 0) {
xbs->xbs_flags &= ~XBS_ATTACH_CH_ACTIVE;
mtx_unlock(&xbs->xbs_lock);
config_intrhook_disestablish(&xbs->xbs_attach_ch);
} else {
mtx_unlock(&xbs->xbs_lock);
}
}
/*--------------------------- Public Functions -------------------------------*/
/*--------- API comments for these methods can be found in xenbusb.h ---------*/
void
xenbusb_identify(driver_t *driver __unused, device_t parent)
{
/*
* A single instance of each bus type for which we have a driver
* is always present in a system operating under Xen.
*/
BUS_ADD_CHILD(parent, 0, driver->name, 0);
}
int
xenbusb_add_device(device_t dev, const char *type, const char *id)
{
struct xenbusb_softc *xbs;
struct sbuf *devpath_sbuf;
char *devpath;
struct xenbus_device_ivars *ivars;
int error;
xbs = device_get_softc(dev);
devpath_sbuf = sbuf_new_auto();
sbuf_printf(devpath_sbuf, "%s/%s/%s", xbs->xbs_node, type, id);
sbuf_finish(devpath_sbuf);
devpath = sbuf_data(devpath_sbuf);
ivars = malloc(sizeof(*ivars), M_XENBUS, M_ZERO|M_WAITOK);
error = ENXIO;
if (xs_exists(XST_NIL, devpath, "") != 0) {
device_t child;
enum xenbus_state state;
char *statepath;
child = xenbusb_device_exists(dev, devpath);
if (child != NULL) {
/*
* We are already tracking this node
*/
error = 0;
goto out;
}
state = xenbus_read_driver_state(devpath);
if (state != XenbusStateInitialising) {
/*
* Device is not new, so ignore it. This can
* happen if a device is going away after
* switching to Closed.
*/
printf("xenbusb_add_device: Device %s ignored. "
"State %d\n", devpath, state);
error = 0;
goto out;
}
sx_init(&ivars->xd_lock, "xdlock");
ivars->xd_flags = XDF_CONNECTING;
ivars->xd_node = strdup(devpath, M_XENBUS);
ivars->xd_type = strdup(type, M_XENBUS);
ivars->xd_state = XenbusStateInitialising;
error = XENBUSB_GET_OTHEREND_NODE(dev, ivars);
if (error) {
printf("xenbus_update_device: %s no otherend id\n",
devpath);
goto out;
}
statepath = malloc(strlen(ivars->xd_otherend_path)
+ strlen("/state") + 1, M_XENBUS, M_WAITOK);
sprintf(statepath, "%s/state", ivars->xd_otherend_path);
ivars->xd_otherend_watch.node = statepath;
ivars->xd_otherend_watch.callback = xenbusb_otherend_changed;
mtx_lock(&xbs->xbs_lock);
xbs->xbs_connecting_children++;
mtx_unlock(&xbs->xbs_lock);
child = device_add_child(dev, NULL, -1);
ivars->xd_dev = child;
device_set_ivars(child, ivars);
}
out:
sbuf_delete(devpath_sbuf);
if (error != 0)
xenbusb_free_child_ivars(ivars);
return (error);
}
int
xenbusb_attach(device_t dev, char *bus_node, u_int id_components)
{
struct xenbusb_softc *xbs;
xbs = device_get_softc(dev);
mtx_init(&xbs->xbs_lock, "xenbusb softc lock", NULL, MTX_DEF);
xbs->xbs_node = bus_node;
xbs->xbs_id_components = id_components;
xbs->xbs_dev = dev;
/*
* Since XenBus busses are attached to the XenStore, and
* the XenStore does not probe children until after interrupt
* services are available, this config hook is used solely
* to ensure that the remainder of the boot process (e.g.
* mount root) is deferred until child devices are adequately
* probed. We unblock the boot process as soon as the
* connecting child count in our softc goes to 0.
*/
xbs->xbs_attach_ch.ich_func = xenbusb_nop_confighook_cb;
xbs->xbs_attach_ch.ich_arg = dev;
config_intrhook_establish(&xbs->xbs_attach_ch);
xbs->xbs_flags |= XBS_ATTACH_CH_ACTIVE;
xbs->xbs_connecting_children = 1;
/*
* The subtree for this bus type may not yet exist
* causing initial enumeration to fail. We still
* want to return success from our attach though
* so that we are ready to handle devices for this
* bus when they are dynamically attached to us
* by a Xen management action.
*/
(void)xenbusb_enumerate_bus(xbs);
xenbusb_probe_children(dev);
xbs->xbs_device_watch.node = bus_node;
xbs->xbs_device_watch.callback = xenbusb_devices_changed;
TASK_INIT(&xbs->xbs_probe_children, 0, xenbusb_probe_children_cb, dev);
xs_register_watch(&xbs->xbs_device_watch);
xenbusb_release_confighook(xbs);
return (0);
}
int
xenbusb_resume(device_t dev)
{
device_t *kids;
struct xenbus_device_ivars *ivars;
int i, count, error;
char *statepath;
/*
* We must re-examine each device and find the new path for
* its backend.
*/
if (device_get_children(dev, &kids, &count) == 0) {
for (i = 0; i < count; i++) {
if (device_get_state(kids[i]) == DS_NOTPRESENT)
continue;
ivars = device_get_ivars(kids[i]);
xs_unregister_watch(&ivars->xd_otherend_watch);
ivars->xd_state = XenbusStateInitialising;
/*
* Find the new backend details and
* re-register our watch.
*/
error = XENBUSB_GET_OTHEREND_NODE(dev, ivars);
if (error)
return (error);
DEVICE_RESUME(kids[i]);
statepath = malloc(strlen(ivars->xd_otherend_path)
+ strlen("/state") + 1, M_XENBUS, M_WAITOK);
sprintf(statepath, "%s/state", ivars->xd_otherend_path);
free(ivars->xd_otherend_watch.node, M_XENBUS);
ivars->xd_otherend_watch.node = statepath;
xs_register_watch(&ivars->xd_otherend_watch);
#if 0
/*
* Can't do this yet since we are running in
* the xenwatch thread and if we sleep here,
* we will stop delivering watch notifications
* and the device will never come back online.
*/
sx_xlock(&ivars->xd_lock);
while (ivars->xd_state != XenbusStateClosed
&& ivars->xd_state != XenbusStateConnected)
sx_sleep(&ivars->xd_state, &ivars->xd_lock,
0, "xdresume", 0);
sx_xunlock(&ivars->xd_lock);
#endif
}
free(kids, M_TEMP);
}
return (0);
}
int
xenbusb_print_child(device_t dev, device_t child)
{
struct xenbus_device_ivars *ivars = device_get_ivars(child);
int retval = 0;
retval += bus_print_child_header(dev, child);
retval += printf(" at %s", ivars->xd_node);
retval += bus_print_child_footer(dev, child);
return (retval);
}
int
xenbusb_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct xenbus_device_ivars *ivars = device_get_ivars(child);
switch (index) {
case XENBUS_IVAR_NODE:
*result = (uintptr_t) ivars->xd_node;
return (0);
case XENBUS_IVAR_TYPE:
*result = (uintptr_t) ivars->xd_type;
return (0);
case XENBUS_IVAR_STATE:
*result = (uintptr_t) ivars->xd_state;
return (0);
case XENBUS_IVAR_OTHEREND_ID:
*result = (uintptr_t) ivars->xd_otherend_id;
return (0);
case XENBUS_IVAR_OTHEREND_PATH:
*result = (uintptr_t) ivars->xd_otherend_path;
return (0);
}
return (ENOENT);
}
int
xenbusb_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
{
struct xenbus_device_ivars *ivars = device_get_ivars(child);
enum xenbus_state newstate;
int currstate;
switch (index) {
case XENBUS_IVAR_STATE:
{
int error;
newstate = (enum xenbus_state) value;
sx_xlock(&ivars->xd_lock);
if (ivars->xd_state == newstate) {
error = 0;
goto out;
}
error = xs_scanf(XST_NIL, ivars->xd_node, "state",
NULL, "%d", &currstate);
if (error)
goto out;
do {
error = xs_printf(XST_NIL, ivars->xd_node, "state",
"%d", newstate);
} while (error == EAGAIN);
if (error) {
/*
* Avoid looping through xenbus_dev_fatal()
* which calls xenbus_write_ivar to set the
* state to closing.
*/
if (newstate != XenbusStateClosing)
xenbus_dev_fatal(dev, error,
"writing new state");
goto out;
}
ivars->xd_state = newstate;
if ((ivars->xd_flags & XDF_CONNECTING) != 0
&& (newstate == XenbusStateClosed
|| newstate == XenbusStateConnected)) {
struct xenbusb_softc *xbs;
ivars->xd_flags &= ~XDF_CONNECTING;
xbs = device_get_softc(dev);
xenbusb_release_confighook(xbs);
}
wakeup(&ivars->xd_state);
out:
sx_xunlock(&ivars->xd_lock);
return (error);
}
case XENBUS_IVAR_NODE:
case XENBUS_IVAR_TYPE:
case XENBUS_IVAR_OTHEREND_ID:
case XENBUS_IVAR_OTHEREND_PATH:
/*
* These variables are read-only.
*/
return (EINVAL);
}
return (ENOENT);
}

272
sys/xen/xenbus/xenbusb.h Normal file
View File

@ -0,0 +1,272 @@
/*-
* Core definitions and data structures shareable across OS platforms.
*
* Copyright (c) 2010 Spectra Logic Corporation
* Copyright (C) 2008 Doug Rabson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions, and the following disclaimer,
* without modification.
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
* substantially similar to the "NO WARRANTY" disclaimer below
* ("Disclaimer") and any redistribution must be conditioned upon
* including a substantially similar Disclaimer requirement for further
* binary redistribution.
*
* NO WARRANTY
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGES.
*
* $FreeBSD$
*/
#ifndef _XEN_XENBUS_XENBUSB_H
#define _XEN_XENBUS_XENBUSB_H
/**
* \file xenbusb.h
*
* Datastructures and function declarations for use in implementing
* bus attachements (e.g. frontend and backend device busses) for XenBus.
*/
#include "xenbusb_if.h"
/**
* Enumeration of state flag values for the xbs_flags field of
* the xenbusb_softc structure.
*/
typedef enum {
/** */
XBS_ATTACH_CH_ACTIVE = 0x01
} xenbusb_softc_flag;
/**
* \brief Container for all state needed to manage a Xenbus Bus
* attachment.
*/
struct xenbusb_softc {
/**
* XenStore watch used to monitor the subtree of the
* XenStore where devices for this bus attachment arrive
* and depart.
*
* \note This field must be the first in the softc structure
* so that a simple cast can be used to retrieve the
* softc from within a XenStore watch event callback.
*/
struct xs_watch xbs_device_watch;
/** Mutex used to protect fields of the xenbusb_softc. */
struct mtx xbs_lock;
/** State flags. */
xenbusb_softc_flag xbs_flags;
/**
* A dedicated task for processing child arrival and
* departure events.
*/
struct task xbs_probe_children;
/**
* Config Hook used to block boot processing until
* XenBus devices complete their connection processing
* with other VMs.
*/
struct intr_config_hook xbs_attach_ch;
/**
* The number of children for this bus that are still
* in the connecting (to other VMs) state. This variable
* is used to determine when to release xbs_attach_ch.
*/
u_int xbs_connecting_children;
/** The NewBus device_t for this bus attachment. */
device_t xbs_dev;
/**
* The VM relative path to the XenStore subtree this
* bus attachment manages.
*/
const char *xbs_node;
/**
* The number of path components (strings separated by the '/'
* character) that make up the device ID on this bus.
*/
u_int xbs_id_components;
};
/**
* Enumeration of state flag values for the xbs_flags field of
* the xenbusb_softc structure.
*/
typedef enum {
/**
* This device is contributing to the xbs_connecting_children
* count of its parent bus.
*/
XDF_CONNECTING = 0x01
} xenbus_dev_flag;
/** Instance variables for devices on a XenBus bus. */
struct xenbus_device_ivars {
/**
* XenStore watch used to monitor the subtree of the
* XenStore where information about the otherend of
* the split Xen device this device instance represents.
*
* \note This field must be the first in the instance
* variable structure so that a simple cast can be
* used to retrieve ivar data from within a XenStore
* watch event callback.
*/
struct xs_watch xd_otherend_watch;
/** Sleepable lock used to protect instance data. */
struct sx xd_lock;
/** State flags. */
xenbus_dev_flag xd_flags;
/** The NewBus device_t for this XenBus device instance. */
device_t xd_dev;
/**
* The VM relative path to the XenStore subtree representing
* this VMs half of this device.
*/
char *xd_node;
/** XenBus device type ("vbd", "vif", etc.). */
char *xd_type;
/**
* Cached version of <xd_node>/state node in the XenStore.
*/
enum xenbus_state xd_state;
/** The VM identifier of the other end of this split device. */
int xd_otherend_id;
/**
* The path to the subtree of the XenStore where information
* about the otherend of this split device instance.
*/
char *xd_otherend_path;
};
/**
* \brief Identify instances of this device type in the system.
*
* \param driver The driver performing this identify action.
* \param parent The NewBus parent device for any devices this method adds.
*/
void xenbusb_identify(driver_t *driver __unused, device_t parent);
/**
* \brief Perform common XenBus bus attach processing.
*
* \param dev The NewBus device representing this XenBus bus.
* \param bus_node The XenStore path to the XenStore subtree for
* this XenBus bus.
* \param id_components The number of '/' separated path components that
* make up a unique device ID on this XenBus bus.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*
* Intiailizes the softc for this bus, installs an interrupt driven
* configuration hook to block boot processing until XenBus devices fully
* configure, performs an initial probe/attach of the bus, and registers
* a XenStore watch so we are notified when the bus topology changes.
*/
int xenbusb_attach(device_t dev, char *bus_node, u_int id_components);
/**
* \brief Perform common XenBus bus resume handling.
*
* \param dev The NewBus device representing this XenBus bus.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
int xenbusb_resume(device_t dev);
/**
* \brief Pretty-prints information about a child of a XenBus bus.
*
* \param dev The NewBus device representing this XenBus bus.
* \param child The NewBus device representing a child of dev%'s XenBus bus.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
int xenbusb_print_child(device_t dev, device_t child);
/**
* \brief Common XenBus child instance variable read access method.
*
* \param dev The NewBus device representing this XenBus bus.
* \param child The NewBus device representing a child of dev%'s XenBus bus.
* \param index The index of the instance variable to access.
* \param result The value of the instance variable accessed.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
int xenbusb_read_ivar(device_t dev, device_t child, int index,
uintptr_t *result);
/**
* \brief Common XenBus child instance variable write access method.
*
* \param dev The NewBus device representing this XenBus bus.
* \param child The NewBus device representing a child of dev%'s XenBus bus.
* \param index The index of the instance variable to access.
* \param value The new value to set in the instance variable accessed.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
int xenbusb_write_ivar(device_t dev, device_t child, int index,
uintptr_t value);
/**
* \brief Attempt to add a XenBus device instance to this XenBus bus.
*
* \param dev The NewBus device representing this XenBus bus.
* \param type The device type being added (e.g. "vbd", "vif").
* \param id The device ID for this device.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure. Failure indicates that either the
* path to this device no longer exists or insufficient
* information exists in the XenStore to create a new
* device.
*
* If successful, this routine will add a device_t with instance
* variable storage to the NewBus device topology. Probe/Attach
* processing is not performed by this routine, but must be scheduled
* via the xbs_probe_children task. This separation of responsibilities
* is required to avoid hanging up the XenStore event delivery thread
* with our probe/attach work in the event a device is added via
* a callback from the XenStore.
*/
int xenbusb_add_device(device_t dev, const char *type, const char *id);
#endif /* _XEN_XENBUS_XENBUSB_H */

Some files were not shown because too many files have changed in this diff Show More