Merge in support for Xen HVM on amd64 architecture.

This commit is contained in:
Doug Rabson 2009-03-11 15:30:12 +00:00
parent 802e54dc1f
commit 1267802438
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=189699
34 changed files with 3559 additions and 529 deletions

View File

@ -1494,6 +1494,14 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
if (env != NULL)
strlcpy(kernelname, env, sizeof(kernelname));
#ifdef XENHVM
if (inw(0x10) == 0x49d2) {
if (bootverbose)
printf("Xen detected: disabling emulated block and network devices\n");
outw(0x10, 3);
}
#endif
/* Location of kernel stack for locore */
return ((u_int64_t)thread0.td_pcb);
}

160
sys/amd64/conf/XENHVM Normal file
View File

@ -0,0 +1,160 @@
#
# XENHVM -- Xen HVM kernel configuration file for FreeBSD/amd64
#
# For more information on this file, please read the config(5) manual page,
# and/or the handbook section on Kernel Configuration Files:
#
# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
#
# The handbook is also available locally in /usr/share/doc/handbook
# if you've installed the doc distribution, otherwise always see the
# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
# latest information.
#
# An exhaustive list of options and more detailed explanations of the
# device lines is also present in the ../../conf/NOTES and NOTES files.
# If you are in doubt as to the purpose or necessity of a line, check first
# in NOTES.
#
# $FreeBSD$
cpu HAMMER
ident GENERIC
# To statically compile in device wiring instead of /boot/device.hints
#hints "GENERIC.hints" # Default places to look for devices.
# Use the following to compile in values accessible to the kernel
# through getenv() (or kenv(1) in userland). The format of the file
# is 'variable=value', see kenv(1)
#
# env "GENERIC.env"
makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
makeoptions MODULES_OVERRIDE=""
options SCHED_ULE # ULE scheduler
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
options INET6 # IPv6 communications protocols
options SCTP # Stream Control Transmission Protocol
options FFS # Berkeley Fast Filesystem
options SOFTUPDATES # Enable FFS soft updates support
options UFS_ACL # Support for access control lists
options UFS_DIRHASH # Improve performance on big directories
options UFS_GJOURNAL # Enable gjournal-based UFS journaling
options MD_ROOT # MD is a potential root device
options NFSCLIENT # Network Filesystem Client
options NFSSERVER # Network Filesystem Server
options NFSLOCKD # Network Lock Manager
options NFS_ROOT # NFS usable as /, requires NFSCLIENT
options MSDOSFS # MSDOS Filesystem
options CD9660 # ISO 9660 Filesystem
options PROCFS # Process filesystem (requires PSEUDOFS)
options PSEUDOFS # Pseudo-filesystem framework
options GEOM_PART_GPT # GUID Partition Tables.
options GEOM_LABEL # Provides labelization
options COMPAT_43TTY # BSD 4.3 TTY compat (sgtty)
options COMPAT_IA32 # Compatible with i386 binaries
options COMPAT_FREEBSD4 # Compatible with FreeBSD4
options COMPAT_FREEBSD5 # Compatible with FreeBSD5
options COMPAT_FREEBSD6 # Compatible with FreeBSD6
options COMPAT_FREEBSD7 # Compatible with FreeBSD7
options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI
options KTRACE # ktrace(1) support
options STACK # stack(9) support
options SYSVSHM # SYSV-style shared memory
options SYSVMSG # SYSV-style message queues
options SYSVSEM # SYSV-style semaphores
options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
options KBD_INSTALL_CDEV # install a CDEV entry in /dev
options STOP_NMI # Stop CPUS using NMI instead of IPI
options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4)
options AUDIT # Security event auditing
#options KDTRACE_FRAME # Ensure frames are compiled in
#options KDTRACE_HOOKS # Kernel DTrace hooks
# Debugging for use in -current
options KDB # Enable kernel debugger support.
options DDB # Support DDB.
options GDB # Support remote GDB.
options INVARIANTS # Enable calls of extra sanity checking
options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
options WITNESS # Enable checks to detect deadlocks and cycles
options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
# Make an SMP-capable kernel by default
options SMP # Symmetric MultiProcessor Kernel
# CPU frequency control
device cpufreq
# Bus support.
device acpi
device pci
# Floppy drives
device fdc
# Xen HVM support
options XENHVM
device xenpci
# ATA and ATAPI devices
device ata
device atadisk # ATA disk drives
device ataraid # ATA RAID drives
device atapicd # ATAPI CDROM drives
device atapifd # ATAPI floppy drives
device atapist # ATAPI tape drives
options ATA_STATIC_ID # Static device numbering
# SCSI peripherals
device scbus # SCSI bus (required for SCSI)
device ch # SCSI media changers
device da # Direct Access (disks)
device sa # Sequential Access (tape etc)
device cd # CD
device pass # Passthrough device (direct SCSI access)
device ses # SCSI Environmental Services (and SAF-TE)
# atkbdc0 controls both the keyboard and the PS/2 mouse
device atkbdc # AT keyboard controller
device atkbd # AT keyboard
device psm # PS/2 mouse
device kbdmux # keyboard multiplexer
device vga # VGA video card driver
device splash # Splash screen and screen saver support
# syscons is the default console driver, resembling an SCO console
device sc
device agp # support several AGP chipsets
# Serial (COM) ports
device uart # Generic UART driver
# PCI Ethernet NICs that use the common MII bus controller code.
# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
device miibus # MII bus support
device re # RealTek 8139C+/8169/8169S/8110S
# Pseudo devices.
device loop # Network loopback
device random # Entropy device
device ether # Ethernet support
device tun # Packet tunnel.
device pty # BSD-style compatibility pseudo ttys
device md # Memory "disks"
device gif # IPv6 and IPv4 tunneling
device faith # IPv6-to-IPv4 relaying (translation)
device firmware # firmware assist module
# The `bpf' device enables the Berkeley Packet Filter.
# Be aware of the administrative consequences of enabling this!
# Note that 'bpf' is required for DHCP.
device bpf # Berkeley packet filter

View File

@ -33,6 +33,24 @@
#error "sys/cdefs.h is a prerequisite for this file"
#endif
#if defined(XEN) || defined(XENHVM)
#ifndef NR_VIRQS
#define NR_VIRQS 24
#endif
#ifndef NR_IPIS
#define NR_IPIS 2
#endif
#endif
#ifdef XENHVM
#define PCPU_XEN_FIELDS \
; \
unsigned int pc_last_processed_l1i; \
unsigned int pc_last_processed_l2i
#else
#define PCPU_XEN_FIELDS
#endif
/*
* The SMP parts are setup in pmap.c and locore.s for the BSP, and
* mp_machdep.c sets up the data for the AP's to "see" when they awake.
@ -49,7 +67,8 @@
register_t pc_scratch_rsp; /* User %rsp in syscall */ \
u_int pc_apic_id; \
u_int pc_acpi_id; /* ACPI CPU id */ \
struct user_segment_descriptor *pc_gs32p
struct user_segment_descriptor *pc_gs32p \
PCPU_XEN_FIELDS
#ifdef _KERNEL

View File

@ -0,0 +1,415 @@
/******************************************************************************
* hypercall.h
*
* Linux-specific hypervisor handling.
*
* Copyright (c) 2002-2004, K A Fraser
*
* 64-bit updates:
* Benjamin Liu <benjamin.liu@intel.com>
* Jun Nakajima <jun.nakajima@intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef __MACHINE_XEN_HYPERCALL_H__
#define __MACHINE_XEN_HYPERCALL_H__
#include <sys/systm.h>
#ifndef __XEN_HYPERVISOR_H__
# error "please don't include this file directly"
#endif
#define __STR(x) #x
#define STR(x) __STR(x)
#define ENOXENSYS 38
#define CONFIG_XEN_COMPAT 0x030002
#define __must_check
#ifdef XEN
#define HYPERCALL_STR(name) \
"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"
#else
#define HYPERCALL_STR(name) \
"mov $("STR(__HYPERVISOR_##name)" * 32),%%eax; "\
"add hypercall_stubs(%%rip),%%rax; " \
"call *%%rax"
#endif
#define _hypercall0(type, name) \
({ \
type __res; \
__asm__ volatile ( \
HYPERCALL_STR(name) \
: "=a" (__res) \
: \
: "memory" ); \
__res; \
})
#define _hypercall1(type, name, a1) \
({ \
type __res; \
long __ign1; \
__asm__ volatile ( \
HYPERCALL_STR(name) \
: "=a" (__res), "=D" (__ign1) \
: "1" ((long)(a1)) \
: "memory" ); \
__res; \
})
#define _hypercall2(type, name, a1, a2) \
({ \
type __res; \
long __ign1, __ign2; \
__asm__ volatile ( \
HYPERCALL_STR(name) \
: "=a" (__res), "=D" (__ign1), "=S" (__ign2) \
: "1" ((long)(a1)), "2" ((long)(a2)) \
: "memory" ); \
__res; \
})
#define _hypercall3(type, name, a1, a2, a3) \
({ \
type __res; \
long __ign1, __ign2, __ign3; \
__asm__ volatile ( \
HYPERCALL_STR(name) \
: "=a" (__res), "=D" (__ign1), "=S" (__ign2), \
"=d" (__ign3) \
: "1" ((long)(a1)), "2" ((long)(a2)), \
"3" ((long)(a3)) \
: "memory" ); \
__res; \
})
#define _hypercall4(type, name, a1, a2, a3, a4) \
({ \
type __res; \
long __ign1, __ign2, __ign3; \
register long __arg4 __asm__("r10") = (long)(a4); \
__asm__ volatile ( \
HYPERCALL_STR(name) \
: "=a" (__res), "=D" (__ign1), "=S" (__ign2), \
"=d" (__ign3), "+r" (__arg4) \
: "1" ((long)(a1)), "2" ((long)(a2)), \
"3" ((long)(a3)) \
: "memory" ); \
__res; \
})
#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
({ \
type __res; \
long __ign1, __ign2, __ign3; \
register long __arg4 __asm__("r10") = (long)(a4); \
register long __arg5 __asm__("r8") = (long)(a5); \
__asm__ volatile ( \
HYPERCALL_STR(name) \
: "=a" (__res), "=D" (__ign1), "=S" (__ign2), \
"=d" (__ign3), "+r" (__arg4), "+r" (__arg5) \
: "1" ((long)(a1)), "2" ((long)(a2)), \
"3" ((long)(a3)) \
: "memory" ); \
__res; \
})
static inline int __must_check
HYPERVISOR_set_trap_table(
const trap_info_t *table)
{
return _hypercall1(int, set_trap_table, table);
}
static inline int __must_check
HYPERVISOR_mmu_update(
mmu_update_t *req, unsigned int count, unsigned int *success_count,
domid_t domid)
{
return _hypercall4(int, mmu_update, req, count, success_count, domid);
}
static inline int __must_check
HYPERVISOR_mmuext_op(
struct mmuext_op *op, unsigned int count, unsigned int *success_count,
domid_t domid)
{
return _hypercall4(int, mmuext_op, op, count, success_count, domid);
}
static inline int __must_check
HYPERVISOR_set_gdt(
unsigned long *frame_list, unsigned int entries)
{
return _hypercall2(int, set_gdt, frame_list, entries);
}
static inline int __must_check
HYPERVISOR_stack_switch(
unsigned long ss, unsigned long esp)
{
return _hypercall2(int, stack_switch, ss, esp);
}
static inline int __must_check
HYPERVISOR_set_callbacks(
unsigned long event_address, unsigned long failsafe_address,
unsigned long syscall_address)
{
return _hypercall3(int, set_callbacks,
event_address, failsafe_address, syscall_address);
}
static inline int
HYPERVISOR_fpu_taskswitch(
int set)
{
return _hypercall1(int, fpu_taskswitch, set);
}
static inline int __must_check
HYPERVISOR_sched_op_compat(
int cmd, unsigned long arg)
{
return _hypercall2(int, sched_op_compat, cmd, arg);
}
static inline int __must_check
HYPERVISOR_sched_op(
int cmd, void *arg)
{
return _hypercall2(int, sched_op, cmd, arg);
}
static inline long __must_check
HYPERVISOR_set_timer_op(
uint64_t timeout)
{
return _hypercall1(long, set_timer_op, timeout);
}
static inline int __must_check
HYPERVISOR_platform_op(
struct xen_platform_op *platform_op)
{
platform_op->interface_version = XENPF_INTERFACE_VERSION;
return _hypercall1(int, platform_op, platform_op);
}
static inline int __must_check
HYPERVISOR_set_debugreg(
unsigned int reg, unsigned long value)
{
return _hypercall2(int, set_debugreg, reg, value);
}
static inline unsigned long __must_check
HYPERVISOR_get_debugreg(
unsigned int reg)
{
return _hypercall1(unsigned long, get_debugreg, reg);
}
static inline int __must_check
HYPERVISOR_update_descriptor(
unsigned long ma, unsigned long word)
{
return _hypercall2(int, update_descriptor, ma, word);
}
static inline int __must_check
HYPERVISOR_memory_op(
unsigned int cmd, void *arg)
{
return _hypercall2(int, memory_op, cmd, arg);
}
static inline int __must_check
HYPERVISOR_multicall(
multicall_entry_t *call_list, unsigned int nr_calls)
{
return _hypercall2(int, multicall, call_list, nr_calls);
}
static inline int __must_check
HYPERVISOR_update_va_mapping(
unsigned long va, uint64_t new_val, unsigned long flags)
{
return _hypercall3(int, update_va_mapping, va, new_val, flags);
}
static inline int __must_check
HYPERVISOR_event_channel_op(
int cmd, void *arg)
{
int rc = _hypercall2(int, event_channel_op, cmd, arg);
#if CONFIG_XEN_COMPAT <= 0x030002
if (unlikely(rc == -ENOXENSYS)) {
struct evtchn_op op;
op.cmd = cmd;
memcpy(&op.u, arg, sizeof(op.u));
rc = _hypercall1(int, event_channel_op_compat, &op);
memcpy(arg, &op.u, sizeof(op.u));
}
#endif
return rc;
}
static inline int __must_check
HYPERVISOR_xen_version(
int cmd, void *arg)
{
return _hypercall2(int, xen_version, cmd, arg);
}
static inline int __must_check
HYPERVISOR_console_io(
int cmd, unsigned int count, char *str)
{
return _hypercall3(int, console_io, cmd, count, str);
}
static inline int __must_check
HYPERVISOR_physdev_op(
int cmd, void *arg)
{
int rc = _hypercall2(int, physdev_op, cmd, arg);
#if CONFIG_XEN_COMPAT <= 0x030002
if (unlikely(rc == -ENOXENSYS)) {
struct physdev_op op;
op.cmd = cmd;
memcpy(&op.u, arg, sizeof(op.u));
rc = _hypercall1(int, physdev_op_compat, &op);
memcpy(arg, &op.u, sizeof(op.u));
}
#endif
return rc;
}
static inline int __must_check
HYPERVISOR_grant_table_op(
unsigned int cmd, void *uop, unsigned int count)
{
return _hypercall3(int, grant_table_op, cmd, uop, count);
}
static inline int __must_check
HYPERVISOR_update_va_mapping_otherdomain(
unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid)
{
return _hypercall4(int, update_va_mapping_otherdomain, va,
new_val, flags, domid);
}
static inline int __must_check
HYPERVISOR_vm_assist(
unsigned int cmd, unsigned int type)
{
return _hypercall2(int, vm_assist, cmd, type);
}
static inline int __must_check
HYPERVISOR_vcpu_op(
int cmd, unsigned int vcpuid, void *extra_args)
{
return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
}
static inline int __must_check
HYPERVISOR_set_segment_base(
int reg, unsigned long value)
{
return _hypercall2(int, set_segment_base, reg, value);
}
static inline int __must_check
HYPERVISOR_suspend(
unsigned long srec)
{
struct sched_shutdown sched_shutdown = {
.reason = SHUTDOWN_suspend
};
int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown,
&sched_shutdown, srec);
#if CONFIG_XEN_COMPAT <= 0x030002
if (rc == -ENOXENSYS)
rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown,
SHUTDOWN_suspend, srec);
#endif
return rc;
}
#if CONFIG_XEN_COMPAT <= 0x030002
static inline int
HYPERVISOR_nmi_op(
unsigned long op, void *arg)
{
return _hypercall2(int, nmi_op, op, arg);
}
#endif
#ifndef CONFIG_XEN
static inline unsigned long __must_check
HYPERVISOR_hvm_op(
int op, void *arg)
{
return _hypercall2(unsigned long, hvm_op, op, arg);
}
#endif
static inline int __must_check
HYPERVISOR_callback_op(
int cmd, const void *arg)
{
return _hypercall2(int, callback_op, cmd, arg);
}
static inline int __must_check
HYPERVISOR_xenoprof_op(
int op, void *arg)
{
return _hypercall2(int, xenoprof_op, op, arg);
}
static inline int __must_check
HYPERVISOR_kexec_op(
unsigned long op, void *args)
{
return _hypercall2(int, kexec_op, op, args);
}
#undef __must_check
#endif /* __MACHINE_XEN_HYPERCALL_H__ */

View File

@ -0,0 +1,129 @@
#ifndef __XEN_SYNCH_BITOPS_H__
#define __XEN_SYNCH_BITOPS_H__
/*
* Copyright 1992, Linus Torvalds.
* Heavily modified to provide guaranteed strong synchronisation
* when communicating with Xen or other guest OSes running on other CPUs.
*/
#define ADDR (*(volatile long *) addr)
static __inline__ void synch_set_bit(int nr, volatile void * addr)
{
__asm__ __volatile__ (
"lock btsl %1,%0"
: "=m" (ADDR) : "Ir" (nr) : "memory" );
}
static __inline__ void synch_clear_bit(int nr, volatile void * addr)
{
__asm__ __volatile__ (
"lock btrl %1,%0"
: "=m" (ADDR) : "Ir" (nr) : "memory" );
}
static __inline__ void synch_change_bit(int nr, volatile void * addr)
{
__asm__ __volatile__ (
"lock btcl %1,%0"
: "=m" (ADDR) : "Ir" (nr) : "memory" );
}
static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
{
int oldbit;
__asm__ __volatile__ (
"lock btsl %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
return oldbit;
}
static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
{
int oldbit;
__asm__ __volatile__ (
"lock btrl %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
return oldbit;
}
static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
{
int oldbit;
__asm__ __volatile__ (
"lock btcl %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
return oldbit;
}
struct __synch_xchg_dummy { unsigned long a[100]; };
#define __synch_xg(x) ((volatile struct __synch_xchg_dummy *)(x))
#define synch_cmpxchg(ptr, old, new) \
((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\
(unsigned long)(old), \
(unsigned long)(new), \
sizeof(*(ptr))))
static inline unsigned long __synch_cmpxchg(volatile void *ptr,
unsigned long old,
unsigned long new, int size)
{
unsigned long prev;
switch (size) {
case 1:
__asm__ __volatile__("lock; cmpxchgb %b1,%2"
: "=a"(prev)
: "q"(new), "m"(*__synch_xg(ptr)),
"0"(old)
: "memory");
return prev;
case 2:
__asm__ __volatile__("lock; cmpxchgw %w1,%2"
: "=a"(prev)
: "q"(new), "m"(*__synch_xg(ptr)),
"0"(old)
: "memory");
return prev;
case 4:
__asm__ __volatile__("lock; cmpxchgl %k1,%2"
: "=a"(prev)
: "q"(new), "m"(*__synch_xg(ptr)),
"0"(old)
: "memory");
return prev;
case 8:
__asm__ __volatile__("lock; cmpxchgq %1,%2"
: "=a"(prev)
: "q"(new), "m"(*__synch_xg(ptr)),
"0"(old)
: "memory");
return prev;
}
return old;
}
static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
{
return ((1UL << (nr & 31)) &
(((const volatile unsigned int *) addr)[nr >> 5])) != 0;
}
static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
{
int oldbit;
__asm__ __volatile__ (
"btl %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
return oldbit;
}
#define synch_test_bit(nr,addr) \
(__builtin_constant_p(nr) ? \
synch_const_test_bit((nr),(addr)) : \
synch_var_test_bit((nr),(addr)))
#endif /* __XEN_SYNCH_BITOPS_H__ */

View File

@ -0,0 +1,296 @@
/******************************************************************************
* os.h
*
* random collection of macros and definition
*/
#ifndef _XEN_OS_H_
#define _XEN_OS_H_
#ifdef PAE
#define CONFIG_X86_PAE
#endif
#if !defined(__XEN_INTERFACE_VERSION__)
/*
* Can update to a more recent version when we implement
* the hypercall page
*/
#define __XEN_INTERFACE_VERSION__ 0x00030204
#endif
#include <xen/interface/xen.h>
/* Force a proper event-channel callback from Xen. */
void force_evtchn_callback(void);
extern int gdtset;
extern shared_info_t *HYPERVISOR_shared_info;
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static inline void rep_nop(void)
{
__asm__ __volatile__ ( "rep;nop" : : : "memory" );
}
#define cpu_relax() rep_nop()
/* crude memory allocator for memory allocation early in
* boot
*/
void *bootmem_alloc(unsigned int size);
void bootmem_free(void *ptr, unsigned int size);
/* Everything below this point is not included by assembler (.S) files. */
#ifndef __ASSEMBLY__
void printk(const char *fmt, ...);
/* some function prototypes */
void trap_init(void);
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
#ifndef XENHVM
/*
* STI/CLI equivalents. These basically set and clear the virtual
* event_enable flag in teh shared_info structure. Note that when
* the enable bit is set, there may be pending events to be handled.
* We may therefore call into do_hypervisor_callback() directly.
*/
#define __cli() \
do { \
vcpu_info_t *_vcpu; \
_vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \
_vcpu->evtchn_upcall_mask = 1; \
barrier(); \
} while (0)
#define __sti() \
do { \
vcpu_info_t *_vcpu; \
barrier(); \
_vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \
_vcpu->evtchn_upcall_mask = 0; \
barrier(); /* unmask then check (avoid races) */ \
if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
force_evtchn_callback(); \
} while (0)
#define __restore_flags(x) \
do { \
vcpu_info_t *_vcpu; \
barrier(); \
_vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \
if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
barrier(); /* unmask then check (avoid races) */ \
if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
force_evtchn_callback(); \
} \
} while (0)
/*
* Add critical_{enter, exit}?
*
*/
#define __save_and_cli(x) \
do { \
vcpu_info_t *_vcpu; \
_vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \
(x) = _vcpu->evtchn_upcall_mask; \
_vcpu->evtchn_upcall_mask = 1; \
barrier(); \
} while (0)
#define cli() __cli()
#define sti() __sti()
#define save_flags(x) __save_flags(x)
#define restore_flags(x) __restore_flags(x)
#define save_and_cli(x) __save_and_cli(x)
#define local_irq_save(x) __save_and_cli(x)
#define local_irq_restore(x) __restore_flags(x)
#define local_irq_disable() __cli()
#define local_irq_enable() __sti()
#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
#define spin_lock_irqsave mtx_lock_irqsave
#define spin_unlock_irqrestore mtx_unlock_irqrestore
#else
#endif
#ifndef mb
#define mb() __asm__ __volatile__("mfence":::"memory")
#endif
#ifndef rmb
#define rmb() __asm__ __volatile__("lfence":::"memory");
#endif
#ifndef wmb
#define wmb() barrier()
#endif
#ifdef SMP
#define smp_mb() mb()
#define smp_rmb() rmb()
#define smp_wmb() wmb()
#define smp_read_barrier_depends() read_barrier_depends()
#define set_mb(var, value) do { xchg(&var, value); } while (0)
#else
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
#define smp_read_barrier_depends() do { } while(0)
#define set_mb(var, value) do { var = value; barrier(); } while (0)
#endif
/* This is a barrier for the compiler only, NOT the processor! */
#define barrier() __asm__ __volatile__("": : :"memory")
#define LOCK_PREFIX ""
#define LOCK ""
#define ADDR (*(volatile long *) addr)
/*
* Make sure gcc doesn't try to be clever and move things around
* on us. We need to use _exactly_ the address the user gave us,
* not some alias that contains the same information.
*/
typedef struct { volatile int counter; } atomic_t;
#define xen_xchg(ptr,v) \
((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
struct __xchg_dummy { unsigned long a[100]; };
#define __xg(x) ((volatile struct __xchg_dummy *)(x))
static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
int size)
{
switch (size) {
case 1:
__asm__ __volatile__("xchgb %b0,%1"
:"=q" (x)
:"m" (*__xg(ptr)), "0" (x)
:"memory");
break;
case 2:
__asm__ __volatile__("xchgw %w0,%1"
:"=r" (x)
:"m" (*__xg(ptr)), "0" (x)
:"memory");
break;
case 4:
__asm__ __volatile__("xchgl %0,%1"
:"=r" (x)
:"m" (*__xg(ptr)), "0" (x)
:"memory");
break;
}
return x;
}
/**
* test_and_clear_bit - Clear a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static __inline int test_and_clear_bit(int nr, volatile void * addr)
{
int oldbit;
__asm__ __volatile__( LOCK_PREFIX
"btrl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"Ir" (nr) : "memory");
return oldbit;
}
static __inline int constant_test_bit(int nr, const volatile void * addr)
{
return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
}
static __inline int variable_test_bit(int nr, volatile void * addr)
{
int oldbit;
__asm__ __volatile__(
"btl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit)
:"m" (ADDR),"Ir" (nr));
return oldbit;
}
#define test_bit(nr,addr) \
(__builtin_constant_p(nr) ? \
constant_test_bit((nr),(addr)) : \
variable_test_bit((nr),(addr)))
/**
* set_bit - Atomically set a bit in memory
* @nr: the bit to set
* @addr: the address to start counting from
*
* This function is atomic and may not be reordered. See __set_bit()
* if you do not require the atomic guarantees.
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
static __inline__ void set_bit(int nr, volatile void * addr)
{
__asm__ __volatile__( LOCK_PREFIX
"btsl %1,%0"
:"=m" (ADDR)
:"Ir" (nr));
}
/**
* clear_bit - Clears a bit in memory
* @nr: Bit to clear
* @addr: Address to start counting from
*
* clear_bit() is atomic and may not be reordered. However, it does
* not contain a memory barrier, so if it is used for locking purposes,
* you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
* in order to ensure changes are visible on other processors.
*/
static __inline__ void clear_bit(int nr, volatile void * addr)
{
__asm__ __volatile__( LOCK_PREFIX
"btrl %1,%0"
:"=m" (ADDR)
:"Ir" (nr));
}
/**
* atomic_inc - increment atomic variable
* @v: pointer of type atomic_t
*
* Atomically increments @v by 1. Note that the guaranteed
* useful range of an atomic_t is only 24 bits.
*/
static __inline__ void atomic_inc(atomic_t *v)
{
__asm__ __volatile__(
LOCK "incl %0"
:"=m" (v->counter)
:"m" (v->counter));
}
#define rdtscll(val) \
__asm__ __volatile__("rdtsc" : "=A" (val))
#endif /* !__ASSEMBLY__ */
#endif /* _OS_H_ */

View File

@ -0,0 +1,83 @@
/*
*
* Copyright (c) 2004,2005 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _XEN_XENFUNC_H_
#define _XEN_XENFUNC_H_
#ifdef XENHVM
#include <machine/xen/xenvar.h>
#else
#include <machine/xen/xenpmap.h>
#include <machine/segments.h>
#endif
#define BKPT __asm__("int3");
#define XPQ_CALL_DEPTH 5
#define XPQ_CALL_COUNT 2
#define PG_PRIV PG_AVAIL3
typedef struct {
unsigned long pt_ref;
unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
} pteinfo_t;
extern pteinfo_t *pteinfo_list;
#ifdef XENDEBUG_LOW
#define __PRINTK(x) printk x
#else
#define __PRINTK(x)
#endif
char *xen_setbootenv(char *cmd_line);
int xen_boothowto(char *envp);
void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
#ifdef INVARIANTS
#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__)
#else
#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
#endif
#ifndef XENHVM
void xen_update_descriptor(union descriptor *, union descriptor *);
#endif
extern struct mtx balloon_lock;
#if 0
#define balloon_lock(__flags) mtx_lock_irqsave(&balloon_lock, __flags)
#define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags)
#else
#define balloon_lock(__flags) __flags = 1
#define balloon_unlock(__flags) __flags = 0
#endif
#endif /* _XEN_XENFUNC_H_ */

View File

@ -0,0 +1,227 @@
/*
*
* Copyright (c) 2004 Christian Limpach.
* Copyright (c) 2004,2005 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Christian Limpach.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _XEN_XENPMAP_H_
#define _XEN_XENPMAP_H_
#include <machine/xen/features.h>
void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
void xen_pt_switch(vm_paddr_t);
void xen_set_ldt(vm_paddr_t, unsigned long);
void xen_pgdpt_pin(vm_paddr_t);
void xen_pgd_pin(vm_paddr_t);
void xen_pgd_unpin(vm_paddr_t);
void xen_pt_pin(vm_paddr_t);
void xen_pt_unpin(vm_paddr_t);
void xen_flush_queue(void);
void xen_check_queue(void);
#if 0
void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
#endif
#ifdef INVARIANTS
#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
#else
#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
#endif
#ifdef PMAP_DEBUG
#define PMAP_REF pmap_ref
#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
#define PMAP_MARK_PRIV pmap_mark_privileged
#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
#else
#define PMAP_MARK_PRIV(a)
#define PMAP_MARK_UNPRIV(a)
#define PMAP_REF(a, b)
#define PMAP_DEC_REF_PAGE(a)
#endif
#define ALWAYS_SYNC 0
#ifdef PT_DEBUG
#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__)
#else
#define PT_LOG()
#endif
#define INVALID_P2M_ENTRY (~0UL)
#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
#define SH_PD_SET_VA 1
#define SH_PD_SET_VA_MA 2
#define SH_PD_SET_VA_CLEAR 3
struct pmap;
void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
#ifdef notyet
static vm_paddr_t
vptetomachpte(vm_paddr_t *pte)
{
vm_offset_t offset, ppte;
vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
int pgindex;
ppte = (vm_offset_t)pte;
pgoffset = (ppte & PAGE_MASK);
offset = ppte - (vm_offset_t)PTmap;
pgindex = ppte >> PDRSHIFT;
pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
return (retval);
}
#endif
#define PT_GET(_ptp) \
(pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
#ifdef WRITABLE_PAGETABLES
#define PT_SET_VA(_ptp,_npte,sync) do { \
PMAP_REF((_ptp), xpmap_ptom(_npte)); \
PT_LOG(); \
*(_ptp) = xpmap_ptom((_npte)); \
} while (/*CONSTCOND*/0)
#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
PMAP_REF((_ptp), (_npte)); \
PT_LOG(); \
*(_ptp) = (_npte); \
} while (/*CONSTCOND*/0)
#define PT_CLEAR_VA(_ptp, sync) do { \
PMAP_REF((pt_entry_t *)(_ptp), 0); \
PT_LOG(); \
*(_ptp) = 0; \
} while (/*CONSTCOND*/0)
#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \
PMAP_REF((_ptp), xpmap_ptom(_npte)); \
pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \
PMAP_REF((_ptp), (_npte)); \
pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
PMAP_REF((pt_entry_t *)(_ptp), 0); \
pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
#else /* !WRITABLE_PAGETABLES */
#define PT_SET_VA(_ptp,_npte,sync) do { \
PMAP_REF((_ptp), xpmap_ptom(_npte)); \
xen_queue_pt_update(vtomach(_ptp), \
xpmap_ptom(_npte)); \
if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
PMAP_REF((_ptp), (_npte)); \
xen_queue_pt_update(vtomach(_ptp), _npte); \
if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
#define PT_CLEAR_VA(_ptp, sync) do { \
PMAP_REF((pt_entry_t *)(_ptp), 0); \
xen_queue_pt_update(vtomach(_ptp), 0); \
if (sync || ALWAYS_SYNC) \
xen_flush_queue(); \
} while (/*CONSTCOND*/0)
#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do { \
PMAP_REF((_ptp), xpmap_ptom(_npte)); \
pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA); \
if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do { \
PMAP_REF((_ptp), (_npte)); \
pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA); \
if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do { \
PMAP_REF((pt_entry_t *)(_ptp), 0); \
pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR); \
if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
#endif
#define PT_SET_MA(_va, _ma) \
do { \
PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
(_ma), \
UVMF_INVLPG| UVMF_ALL) < 0); \
} while (/*CONSTCOND*/0)
#define PT_UPDATES_FLUSH() do { \
xen_flush_queue(); \
} while (/*CONSTCOND*/0)
static __inline vm_paddr_t
xpmap_mtop(vm_paddr_t mpa)
{
vm_paddr_t tmp = (mpa & PG_FRAME);
return machtophys(tmp) | (mpa & ~PG_FRAME);
}
static __inline vm_paddr_t
xpmap_ptom(vm_paddr_t ppa)
{
vm_paddr_t tmp = (ppa & PG_FRAME);
return phystomach(tmp) | (ppa & ~PG_FRAME);
}
static __inline void
set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
#ifdef notyet
PANIC_IF(max_mapnr && pfn >= max_mapnr);
#endif
if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef notyet
PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
#endif
return;
}
xen_phys_machine[pfn] = mfn;
}
#endif /* _XEN_XENPMAP_H_ */

View File

@ -0,0 +1,122 @@
/*
* Copyright (c) 2008 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* $FreeBSD$
*/
#ifndef XENVAR_H_
#define XENVAR_H_
#define XBOOTUP 0x1
#define XPMAP 0x2
extern int xendebug_flags;
#ifndef NOXENDEBUG
#define XENPRINTF printk
#else
#define XENPRINTF printf
#endif
#include <xen/features.h>
#if 0
#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
#define TRACE_DEBUG(argflags, _f, _a...) \
if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
#else
#define TRACE_ENTER
#define TRACE_EXIT
#define TRACE_DEBUG(argflags, _f, _a...)
#endif
#ifdef XENHVM
static inline vm_paddr_t
phystomach(vm_paddr_t pa)
{
return (pa);
}
static inline vm_paddr_t
machtophys(vm_paddr_t ma)
{
return (ma);
}
#define vtomach(va) pmap_kextract((vm_offset_t) (va))
#define PFNTOMFN(pa) (pa)
#define MFNTOPFN(ma) (ma)
#define set_phys_to_machine(pfn, mfn) ((void)0)
#define phys_to_machine_mapping_valid(pfn) (TRUE)
#define PT_UPDATES_FLUSH() ((void)0)
#else
extern xen_pfn_t *xen_phys_machine;
extern xen_pfn_t *xen_machine_phys;
/* Xen starts physical pages after the 4MB ISA hole -
* FreeBSD doesn't
*/
#undef ADD_ISA_HOLE /* XXX */
#ifdef ADD_ISA_HOLE
#define ISA_INDEX_OFFSET 1024
#define ISA_PDR_OFFSET 1
#else
#define ISA_INDEX_OFFSET 0
#define ISA_PDR_OFFSET 0
#endif
#define PFNTOMFN(i) (xen_phys_machine[(i)])
#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
#define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT)
#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
#endif
void xpq_init(void);
int xen_create_contiguous_region(vm_page_t pages, int npages);
void xen_destroy_contiguous_region(void * addr, int npages);
#endif

View File

@ -2758,21 +2758,24 @@ gnu/fs/xfs/xfs_iomap.c optional xfs \
gnu/fs/xfs/xfs_behavior.c optional xfs \
compile-with "${NORMAL_C} -I$S/gnu/fs/xfs/FreeBSD -I$S/gnu/fs/xfs/FreeBSD/support -I$S/gnu/fs/xfs"
xen/gnttab.c optional xen
xen/features.c optional xen
xen/evtchn/evtchn.c optional xen
xen/evtchn/evtchn_dev.c optional xen
xen/xenbus/xenbus_client.c optional xen
xen/xenbus/xenbus_comms.c optional xen
xen/xenbus/xenbus_dev.c optional xen
xen/xenbus/xenbus_if.m optional xen
xen/xenbus/xenbus_probe.c optional xen
#xen/xenbus/xenbus_probe_backend.c optional xen
xen/xenbus/xenbus_xs.c optional xen
dev/xen/console/console.c optional xen
dev/xen/console/xencons_ring.c optional xen
dev/xen/blkfront/blkfront.c optional xen
dev/xen/netfront/netfront.c optional xen
#dev/xen/xenpci/xenpci.c optional xen
#xen/xenbus/xenbus_newbus.c optional xenhvm
xen/gnttab.c optional xen | xenhvm
xen/features.c optional xen | xenhvm
xen/evtchn/evtchn.c optional xen
xen/evtchn/evtchn_dev.c optional xen | xenhvm
xen/reboot.c optional xen
xen/xenbus/xenbus_client.c optional xen | xenhvm
xen/xenbus/xenbus_comms.c optional xen | xenhvm
xen/xenbus/xenbus_dev.c optional xen | xenhvm
xen/xenbus/xenbus_if.m optional xen | xenhvm
xen/xenbus/xenbus_probe.c optional xen | xenhvm
#xen/xenbus/xenbus_probe_backend.c optional xen
xen/xenbus/xenbus_xs.c optional xen | xenhvm
dev/xen/balloon/balloon.c optional xen | xenhvm
dev/xen/console/console.c optional xen
dev/xen/console/xencons_ring.c optional xen
dev/xen/blkfront/blkfront.c optional xen | xenhvm
dev/xen/netfront/netfront.c optional xen | xenhvm
dev/xen/xenpci/xenpci.c optional xenpci
dev/xen/xenpci/evtchn.c optional xenpci
dev/xen/xenpci/machine_reboot.c optional xenpci

View File

@ -57,3 +57,5 @@ KDTRACE_FRAME opt_kdtrace.h
# BPF just-in-time compiler
BPF_JITTER opt_bpf.h
XENHVM opt_global.h

View File

@ -34,11 +34,24 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
#include <machine/hypervisor-ifs.h>
#include <machine/xen-os.h>
#include <machine/xenbus.h>
#include <machine/xen/xen-os.h>
#include <machine/xen/xenfunc.h>
#include <machine/xen/xenvar.h>
#include <xen/hypervisor.h>
#include <xen/xenbus/xenbusvar.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
struct mtx balloon_mutex;
/*
* Protects atomic reservation decrease/increase against concurrent increases.
@ -46,23 +59,44 @@ __FBSDID("$FreeBSD$");
* balloon lists.
*/
struct mtx balloon_lock;
#ifdef notyet
/* We aim for 'current allocation' == 'target allocation'. */
static unsigned long current_pages;
static unsigned long target_pages;
/* We increase/decrease in batches which fit in a page */
static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
#define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0]))
/* VM /proc information for memory */
extern unsigned long totalram_pages;
struct balloon_stats {
/* We aim for 'current allocation' == 'target allocation'. */
unsigned long current_pages;
unsigned long target_pages;
/* We may hit the hard limit in Xen. If we do then we remember it. */
unsigned long hard_limit;
/*
* Drivers may alter the memory reservation independently, but they
* must inform the balloon driver so we avoid hitting the hard limit.
*/
unsigned long driver_pages;
/* Number of pages in high- and low-memory balloons. */
unsigned long balloon_low;
unsigned long balloon_high;
};
/* We may hit the hard limit in Xen. If we do then we remember it. */
static unsigned long hard_limit;
static struct balloon_stats balloon_stats;
#define bs balloon_stats
/*
* Drivers may alter the memory reservation independently, but they must
* inform the balloon driver so that we can avoid hitting the hard limit.
*/
static unsigned long driver_pages;
SYSCTL_DECL(_dev_xen);
SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon");
SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD,
&bs.current_pages, 0, "Current allocation");
SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD,
&bs.target_pages, 0, "Target allocation");
SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD,
&bs.driver_pages, 0, "Driver pages");
SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD,
&bs.hard_limit, 0, "Xen hard limit");
SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD,
&bs.balloon_low, 0, "Low-mem balloon");
SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
&bs.balloon_high, 0, "High-mem balloon");
struct balloon_entry {
vm_page_t page;
@ -72,9 +106,6 @@ struct balloon_entry {
/* List of ballooned pages, threaded through the mem_map array. */
static STAILQ_HEAD(,balloon_entry) ballooned_pages;
static unsigned long balloon_low, balloon_high;
/* Main work function, always executed in process context. */
static void balloon_process(void *unused);
@ -89,10 +120,10 @@ balloon_append(vm_page_t page)
{
struct balloon_entry *entry;
entry = malloc(sizeof(struct balloon_entry), M_WAITOK);
entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
entry->page = page;
STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
balloon_low++;
bs.balloon_low++;
}
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@ -111,13 +142,13 @@ balloon_retrieve(void)
page = entry->page;
free(entry, M_DEVBUF);
balloon_low--;
bs.balloon_low--;
return page;
}
static void
balloon_alarm(unsigned long unused)
balloon_alarm(void *unused)
{
wakeup(balloon_process);
}
@ -125,17 +156,56 @@ balloon_alarm(unsigned long unused)
static unsigned long
current_target(void)
{
unsigned long target = min(target_pages, hard_limit);
if (target > (current_pages + balloon_low + balloon_high))
target = current_pages + balloon_low + balloon_high;
unsigned long target = min(bs.target_pages, bs.hard_limit);
if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
target = bs.current_pages + bs.balloon_low + bs.balloon_high;
return target;
}
static unsigned long
minimum_target(void)
{
#ifdef XENHVM
#define max_pfn physmem
#endif
unsigned long min_pages, curr_pages = current_target();
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
/* Simple continuous piecewiese linear function:
* max MiB -> min MiB gradient
* 0 0
* 16 16
* 32 24
* 128 72 (1/2)
* 512 168 (1/4)
* 2048 360 (1/8)
* 8192 552 (1/32)
* 32768 1320
* 131072 4392
*/
if (max_pfn < MB2PAGES(128))
min_pages = MB2PAGES(8) + (max_pfn >> 1);
else if (max_pfn < MB2PAGES(512))
min_pages = MB2PAGES(40) + (max_pfn >> 2);
else if (max_pfn < MB2PAGES(2048))
min_pages = MB2PAGES(104) + (max_pfn >> 3);
else
min_pages = MB2PAGES(296) + (max_pfn >> 5);
#undef MB2PAGES
/* Don't enforce growth */
return min(min_pages, curr_pages);
#ifndef CONFIG_XEN
#undef max_pfn
#endif
}
static int
increase_reservation(unsigned long nr_pages)
{
unsigned long *mfn_list, pfn, i, flags;
struct page *page;
unsigned long pfn, i;
struct balloon_entry *entry;
vm_page_t page;
long rc;
struct xen_memory_reservation reservation = {
.address_bits = 0,
@ -143,64 +213,81 @@ increase_reservation(unsigned long nr_pages)
.domid = DOMID_SELF
};
if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
nr_pages = PAGE_SIZE / sizeof(unsigned long);
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
if (mfn_list == NULL)
return ENOMEM;
mtx_lock(&balloon_lock);
for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
KASSERT(entry, ("ballooned_pages list corrupt"));
page = entry->page;
frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
}
reservation.extent_start = mfn_list;
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op(
XENMEM_increase_reservation, &reservation);
XENMEM_populate_physmap, &reservation);
if (rc < nr_pages) {
int ret;
/* We hit the Xen hard limit: reprobe. */
reservation.extent_start = mfn_list;
reservation.nr_extents = rc;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
PANIC_IF(ret != rc);
hard_limit = current_pages + rc - driver_pages;
if (rc > 0) {
int ret;
/* We hit the Xen hard limit: reprobe. */
reservation.nr_extents = rc;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
KASSERT(ret == rc, ("HYPERVISOR_memory_op failed"));
}
if (rc >= 0)
bs.hard_limit = (bs.current_pages + rc -
bs.driver_pages);
goto out;
}
for (i = 0; i < nr_pages; i++) {
page = balloon_retrieve();
PANIC_IF(page == NULL);
KASSERT(page, ("balloon_retrieve failed"));
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
PANIC_IF(phys_to_machine_mapping_valid(pfn));
KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
!phys_to_machine_mapping_valid(pfn)),
("auto translated physmap but mapping is valid"));
set_phys_to_machine(pfn, frame_list[i]);
#ifndef XENHVM
/* Link back into the page tables if not highmem. */
if (pfn < max_low_pfn) {
int ret;
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
pfn_pte_ma(frame_list[i], PAGE_KERNEL),
0);
PASSING(ret == 0,
("HYPERVISOR_update_va_mapping failed"));
}
#endif
/* Update P->M and M->P tables. */
PFNTOMFN(pfn) = mfn_list[i];
xen_machphys_update(mfn_list[i], pfn);
/* Relinquish the page back to the allocator. */
ClearPageReserved(page);
set_page_count(page, 1);
vm_page_unwire(page, 0);
vm_page_free(page);
}
current_pages += nr_pages;
totalram_pages = current_pages;
bs.current_pages += nr_pages;
//totalram_pages = bs.current_pages;
out:
balloon_unlock(flags);
free((mfn_list);
mtx_unlock(&balloon_lock);
return 0;
}
static int
static int
decrease_reservation(unsigned long nr_pages)
{
unsigned long *mfn_list, pfn, i, flags;
struct page *page;
void *v;
unsigned long pfn, i;
vm_page_t page;
int need_sleep = 0;
int ret;
struct xen_memory_reservation reservation = {
@ -209,48 +296,68 @@ decrease_reservation(unsigned long nr_pages)
.domid = DOMID_SELF
};
if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
nr_pages = PAGE_SIZE / sizeof(unsigned long);
mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
if (mfn_list == NULL)
return ENOMEM;
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
for (i = 0; i < nr_pages; i++) {
int color = 0;
if ((page = vm_page_alloc(NULL, color++,
VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
nr_pages = i;
need_sleep = 1;
break;
}
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
mfn_list[i] = PFNTOMFN(pfn);
frame_list[i] = PFNTOMFN(pfn);
#if 0
if (!PageHighMem(page)) {
v = phys_to_virt(pfn << PAGE_SHIFT);
scrub_pages(v, 1);
#ifdef CONFIG_XEN
ret = HYPERVISOR_update_va_mapping(
(unsigned long)v, __pte_ma(0), 0);
BUG_ON(ret);
#endif
}
#endif
#ifdef CONFIG_XEN_SCRUB_PAGES
else {
v = kmap(page);
scrub_pages(v, 1);
kunmap(page);
}
#endif
}
balloon_lock(flags);
#ifdef CONFIG_XEN
/* Ensure that ballooned highmem pages don't have kmaps. */
kmap_flush_unused();
flush_tlb_all();
#endif
mtx_lock(&balloon_lock);
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = MFNTOPFN(mfn_list[i]);
PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
pfn = MFNTOPFN(frame_list[i]);
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
}
reservation.extent_start = mfn_list;
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
PANIC_IF(ret != nr_pages);
KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
current_pages -= nr_pages;
totalram_pages = current_pages;
bs.current_pages -= nr_pages;
//totalram_pages = bs.current_pages;
balloon_unlock(flags);
mtx_unlock(&balloon_lock);
free(mfn_list, M_DEVBUF);
return need_sleep;
return (need_sleep);
}
/*
@ -265,27 +372,24 @@ balloon_process(void *unused)
int need_sleep = 0;
long credit;
mtx_lock(&balloon_mutex);
for (;;) {
do {
credit = current_target() - current_pages;
credit = current_target() - bs.current_pages;
if (credit > 0)
need_sleep = (increase_reservation(credit) != 0);
if (credit < 0)
need_sleep = (decrease_reservation(-credit) != 0);
#ifndef CONFIG_PREEMPT
if (need_resched())
schedule();
#endif
} while ((credit != 0) && !need_sleep);
/* Schedule more work if there is some still to be done. */
if (current_target() != current_pages)
timeout(balloon_alarm, NULL, ticks + HZ);
if (current_target() != bs.current_pages)
timeout(balloon_alarm, NULL, ticks + hz);
msleep(balloon_process, balloon_lock, 0, "balloon", -1);
msleep(balloon_process, &balloon_mutex, 0, "balloon", -1);
}
mtx_unlock(&balloon_mutex);
}
/* Resets the Xen limit, sets new target, and kicks off processing. */
@ -293,8 +397,8 @@ static void
set_new_target(unsigned long target)
{
/* No need for lock. Not read-modify-write updates. */
hard_limit = ~0UL;
target_pages = target;
bs.hard_limit = ~0UL;
bs.target_pages = max(target, minimum_target());
wakeup(balloon_process);
}
@ -311,8 +415,9 @@ watch_target(struct xenbus_watch *watch,
unsigned long long new_target;
int err;
err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
if (err != 1) {
err = xenbus_scanf(XBT_NIL, "memory", "target", NULL,
"%llu", &new_target);
if (err) {
/* This is ok (for domain0 at least) - so just return */
return;
}
@ -325,7 +430,7 @@ watch_target(struct xenbus_watch *watch,
}
static void
balloon_init_watcher(void *)
balloon_init_watcher(void *arg)
{
int err;
@ -334,48 +439,60 @@ balloon_init_watcher(void *)
printf("Failed to set balloon watcher\n");
}
SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY,
balloon_init_watcher, NULL);
static void
balloon_init(void *)
balloon_init(void *arg)
{
unsigned long pfn;
struct page *page;
#ifndef XENHVM
vm_page_t page;
#endif
IPRINTK("Initialising balloon driver.\n");
if (!is_running_on_xen())
return;
if (xen_init() < 0)
return -1;
mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
current_pages = min(xen_start_info->nr_pages, max_pfn);
target_pages = current_pages;
balloon_low = 0;
balloon_high = 0;
driver_pages = 0UL;
hard_limit = ~0UL;
#ifndef XENHVM
bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
#else
bs.current_pages = physmem;
#endif
bs.target_pages = bs.current_pages;
bs.balloon_low = 0;
bs.balloon_high = 0;
bs.driver_pages = 0UL;
bs.hard_limit = ~0UL;
init_timer(&balloon_timer);
balloon_timer.data = 0;
balloon_timer.function = balloon_alarm;
kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon");
// init_timer(&balloon_timer);
// balloon_timer.data = 0;
// balloon_timer.function = balloon_alarm;
#ifndef XENHVM
/* Initialise the balloon with excess memory space. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
balloon_append(page);
}
#endif
target_watch.callback = watch_target;
return 0;
return;
}
SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL);
void balloon_update_driver_allowance(long delta);
void
balloon_update_driver_allowance(long delta)
{
unsigned long flags;
balloon_lock(flags);
driver_pages += delta;
balloon_unlock(flags);
mtx_lock(&balloon_lock);
bs.driver_pages += delta;
mtx_unlock(&balloon_lock);
}
#if 0
@ -393,17 +510,18 @@ static int dealloc_pte_fn(
set_pte_at(&init_mm, addr, pte, __pte_ma(0));
set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
PANIC_IF(ret != 1);
KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
return 0;
}
#endif
#if 0
vm_page_t
balloon_alloc_empty_page_range(unsigned long nr_pages)
{
unsigned long flags;
vm_page_t pages;
int i;
int i, rc;
unsigned long *mfn_list;
struct xen_memory_reservation reservation = {
.address_bits = 0,
@ -422,7 +540,9 @@ balloon_alloc_empty_page_range(unsigned long nr_pages)
PFNTOMFN(i) = INVALID_P2M_ENTRY;
reservation.extent_start = mfn_list;
reservation.nr_extents = nr_pages;
PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != nr_pages);
rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
}
current_pages -= nr_pages;
@ -435,12 +555,11 @@ balloon_alloc_empty_page_range(unsigned long nr_pages)
void
balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
{
unsigned long i, flags;
unsigned long i;
for (i = 0; i < nr_pages; i++)
balloon_append(page + i);
wakeup(balloon_process);
}
#endif

View File

@ -40,17 +40,17 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <machine/vmparam.h>
#include <xen/hypervisor.h>
#include <machine/xen/xen-os.h>
#include <machine/xen/xenfunc.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/protocols.h>
#include <xen/xenbus/xenbusvar.h>
#include <geom/geom_disk.h>
#include <machine/xen/xenfunc.h>
#include <xen/gnttab.h>
#include <dev/xen/blkfront/block.h>
@ -106,7 +106,7 @@ static char * blkif_status_name[] = {
#endif
#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
#if 0
#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args)
#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
#else
#define DPRINTK(fmt, args...)
#endif
@ -138,7 +138,6 @@ pfn_to_mfn(vm_paddr_t pfn)
return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
}
/*
* Translate Linux major/minor to an appropriate name and unit
* number. For HVM guests, this allows us to use the same drive names
@ -323,17 +322,17 @@ blkfront_probe(device_t dev)
static int
blkfront_attach(device_t dev)
{
int err, vdevice, i, unit;
int error, vdevice, i, unit;
struct blkfront_info *info;
const char *name;
/* FIXME: Use dynamic device id if this is not set. */
err = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
"virtual-device", NULL, "%i", &vdevice);
if (err) {
xenbus_dev_fatal(dev, err, "reading virtual-device");
if (error) {
xenbus_dev_fatal(dev, error, "reading virtual-device");
printf("couldn't find virtual device");
return (err);
return (error);
}
blkfront_vdevice_to_unit(vdevice, &unit, &name);
@ -362,9 +361,22 @@ blkfront_attach(device_t dev)
/* Front end dir is a number, which is used as the id. */
info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
err = talk_to_backend(dev, info);
if (err)
return (err);
error = talk_to_backend(dev, info);
if (error)
return (error);
return (0);
}
static int
blkfront_suspend(device_t dev)
{
struct blkfront_info *info = device_get_softc(dev);
/* Prevent new requests being issued until we fix things up. */
mtx_lock(&blkif_io_lock);
info->connected = BLKIF_STATE_SUSPENDED;
mtx_unlock(&blkif_io_lock);
return (0);
}
@ -375,16 +387,14 @@ blkfront_resume(device_t dev)
struct blkfront_info *info = device_get_softc(dev);
int err;
DPRINTK("blkfront_resume: %s\n", dev->nodename);
DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
blkif_free(info, 1);
err = talk_to_backend(dev, info);
if (info->connected == BLKIF_STATE_SUSPENDED && !err)
blkif_recover(info);
return err;
return (err);
}
/* Common code used when first setting up, and when resuming. */
@ -425,6 +435,7 @@ talk_to_backend(device_t dev, struct blkfront_info *info)
message = "writing protocol";
goto abort_transaction;
}
err = xenbus_transaction_end(xbt, 0);
if (err) {
if (err == EAGAIN)
@ -462,8 +473,8 @@ setup_blkring(device_t dev, struct blkfront_info *info)
SHARED_RING_INIT(sring);
FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
error = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT),
&info->ring_ref);
error = xenbus_grant_ring(dev,
(vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref);
if (error) {
free(sring, M_DEVBUF);
info->ring.sring = NULL;
@ -471,11 +482,11 @@ setup_blkring(device_t dev, struct blkfront_info *info)
}
error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
"xbd", (driver_intr_t *)blkif_int, info,
INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
"xbd", (driver_intr_t *)blkif_int, info,
INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
if (error) {
xenbus_dev_fatal(dev, error,
"bind_evtchn_to_irqhandler failed");
"bind_evtchn_to_irqhandler failed");
goto fail;
}
@ -494,7 +505,7 @@ blkfront_backend_changed(device_t dev, XenbusState backend_state)
{
struct blkfront_info *info = device_get_softc(dev);
DPRINTK("blkfront:backend_changed.\n");
DPRINTK("backend_state=%d\n", backend_state);
switch (backend_state) {
case XenbusStateUnknown:
@ -707,7 +718,7 @@ blkif_open(struct disk *dp)
struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
if (sc == NULL) {
printk("xb%d: not found", sc->xb_unit);
printf("xb%d: not found", sc->xb_unit);
return (ENXIO);
}
@ -1019,9 +1030,11 @@ blkif_recover(struct blkfront_info *info)
blkif_request_t *req;
struct blk_shadow *copy;
if (!info->sc)
return;
/* Stage 1: Make a safe copy of the shadow state. */
copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
PANIC_IF(copy == NULL);
memcpy(copy, info->shadow, sizeof(info->shadow));
/* Stage 2: Set up free list. */
@ -1084,7 +1097,7 @@ static device_method_t blkfront_methods[] = {
DEVMETHOD(device_attach, blkfront_attach),
DEVMETHOD(device_detach, blkfront_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
DEVMETHOD(device_suspend, bus_generic_suspend),
DEVMETHOD(device_suspend, blkfront_suspend),
DEVMETHOD(device_resume, blkfront_resume),
/* Xenbus interface */

View File

@ -5,6 +5,7 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/systm.h>
#include <sys/consio.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/tty.h>
@ -18,7 +19,7 @@ __FBSDID("$FreeBSD$");
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <sys/cons.h>
#include <sys/priv.h>
#include <sys/kdb.h>
#include <sys/proc.h>
#include <dev/xen/console/xencons_ring.h>
@ -125,12 +126,17 @@ xccngetc(struct consdev *dev)
return 0;
do {
if ((c = xccncheckc(dev)) == -1) {
/* polling without sleeping in Xen doesn't work well.
* Sleeping gives other things like clock a chance to
* run
*/
tsleep(&cn_mtx, PWAIT | PCATCH, "console sleep",
XC_POLLTIME);
#ifdef KDB
if (!kdb_active)
#endif
/*
* Polling without sleeping in Xen
* doesn't work well. Sleeping gives
* other things like clock a chance to
* run
*/
tsleep(&cn_mtx, PWAIT | PCATCH,
"console sleep", XC_POLLTIME);
}
} while(c == -1);
return c;
@ -140,11 +146,13 @@ int
xccncheckc(struct consdev *dev)
{
int ret = (xc_mute ? 0 : -1);
if (xencons_has_input())
xencons_handle_input(NULL);
if (xencons_has_input())
xencons_handle_input(NULL);
CN_LOCK(cn_mtx);
if ((rp - rc)) {
if (kdb_active) printf("%s:%d\n", __func__, __LINE__);
/* we need to return only one char */
ret = (int)rbuf[RBUF_MASK(rc)];
rc++;
@ -235,17 +243,16 @@ xc_attach(device_t dev)
if (xen_start_info->flags & SIF_INITDOMAIN) {
error = bind_virq_to_irqhandler(
VIRQ_CONSOLE,
0,
"console",
NULL,
xencons_priv_interrupt,
sc, INTR_TYPE_TTY, NULL);
VIRQ_CONSOLE,
0,
"console",
NULL,
xencons_priv_interrupt,
INTR_TYPE_TTY, NULL);
KASSERT(error >= 0, ("can't register console interrupt"));
}
/* register handler to flush console on shutdown */
if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown,
NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
@ -270,7 +277,11 @@ xencons_rx(char *buf, unsigned len)
int i;
struct tty *tp = xccons;
if (xen_console_up) {
if (xen_console_up
#ifdef DDB
&& !kdb_active
#endif
) {
tty_lock(tp);
for (i = 0; i < len; i++)
ttydisc_rint(tp, buf[i], 0);
@ -423,12 +434,3 @@ xcons_force_flush(void)
}
DRIVER_MODULE(xc, nexus, xc_driver, xc_devclass, 0, 0);
/*
* Local variables:
* mode: C
* c-set-style: "BSD"
* c-basic-offset: 8
* tab-width: 4
* indent-tabs-mode: t
* End:
*/

View File

@ -13,19 +13,24 @@ __FBSDID("$FreeBSD$");
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/cons.h>
#include <machine/stdarg.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <sys/cons.h>
#include <xen/xen_intr.h>
#include <xen/evtchn.h>
#include <xen/interface/io/console.h>
#include <dev/xen/console/xencons_ring.h>
#include <xen/evtchn.h>
#include <xen/interface/io/console.h>
#define console_evtchn console.domU.evtchn
static unsigned int console_irq;
extern char *console_page;
extern struct mtx cn_mtx;
@ -60,7 +65,8 @@ xencons_ring_send(const char *data, unsigned len)
sent = 0;
mb();
PANIC_IF((prod - cons) > sizeof(intf->out));
KASSERT((prod - cons) <= sizeof(intf->out),
("console send ring inconsistent"));
while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
@ -119,15 +125,18 @@ xencons_ring_init(void)
return 0;
err = bind_caller_port_to_irqhandler(xen_start_info->console_evtchn,
"xencons", xencons_handle_input, NULL,
INTR_TYPE_MISC | INTR_MPSAFE, NULL);
"xencons", xencons_handle_input, NULL,
INTR_TYPE_MISC | INTR_MPSAFE, &console_irq);
if (err) {
return err;
}
return 0;
}
#ifdef notyet
extern void xencons_suspend(void);
extern void xencons_resume(void);
void
xencons_suspend(void)
{
@ -135,7 +144,7 @@ xencons_suspend(void)
if (!xen_start_info->console_evtchn)
return;
unbind_evtchn_from_irqhandler(xen_start_info->console_evtchn, NULL);
unbind_from_irqhandler(console_irq);
}
void
@ -144,7 +153,7 @@ xencons_resume(void)
(void)xencons_ring_init();
}
#endif
/*
* Local variables:
* mode: C

View File

@ -24,11 +24,11 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/queue.h>
#include <sys/sx.h>
@ -47,6 +47,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/if_ether.h>
#if __FreeBSD_version >= 700000
#include <netinet/tcp.h>
#include <netinet/tcp_lro.h>
#endif
#include <vm/vm.h>
#include <vm/pmap.h>
@ -63,23 +67,42 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <machine/xen/xen-os.h>
#include <machine/xen/xenfunc.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/interface/memory.h>
#include <dev/xen/netfront/mbufq.h>
#include <machine/xen/features.h>
#include <xen/interface/io/netif.h>
#include <xen/xenbus/xenbusvar.h>
#include <dev/xen/netfront/mbufq.h>
#include "xenbus_if.h"
#define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP | CSUM_TSO)
#define GRANT_INVALID_REF 0
#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
#if __FreeBSD_version >= 700000
/*
* Should the driver do LRO on the RX end
* this can be toggled on the fly, but the
* interface must be reset (down/up) for it
* to take effect.
*/
static int xn_enable_lro = 1;
TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro);
#else
#define IFCAP_TSO4 0
#define CSUM_TSO 0
#endif
#ifdef CONFIG_XEN
static int MODPARM_rx_copy = 0;
module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
@ -92,6 +115,7 @@ static const int MODPARM_rx_copy = 1;
static const int MODPARM_rx_flip = 0;
#endif
#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
#define RX_COPY_THRESHOLD 256
#define net_ratelimit() 0
@ -192,6 +216,9 @@ struct net_device_stats
struct netfront_info {
struct ifnet *xn_ifp;
#if __FreeBSD_version >= 700000
struct lro_ctrl xn_lro;
#endif
struct net_device_stats stats;
u_int tx_full;
@ -329,31 +356,12 @@ xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
printf("[XEN] " fmt, ##args)
#define WPRINTK(fmt, args...) \
printf("[XEN] " fmt, ##args)
#if 0
#define DPRINTK(fmt, args...) \
printf("[XEN] %s: " fmt, __func__, ##args)
static __inline struct mbuf*
makembuf (struct mbuf *buf)
{
struct mbuf *m = NULL;
MGETHDR (m, M_DONTWAIT, MT_DATA);
if (! m)
return 0;
M_MOVE_PKTHDR(m, buf);
m_cljget(m, M_DONTWAIT, MJUMPAGESIZE);
m->m_pkthdr.len = buf->m_pkthdr.len;
m->m_len = buf->m_len;
m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
m->m_ext.ext_arg1 = (caddr_t *)(uintptr_t)(vtophys(mtod(m,caddr_t)) >> PAGE_SHIFT);
return m;
}
#else
#define DPRINTK(fmt, args...)
#endif
/**
* Read the 'mac' node at the given device's node in the store, and parse that
@ -414,6 +422,13 @@ netfront_attach(device_t dev)
return err;
}
#if __FreeBSD_version >= 700000
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
&xn_enable_lro, 0, "Large Receive Offload");
#endif
return 0;
}
@ -489,17 +504,12 @@ talk_to_backend(device_t dev, struct netfront_info *info)
message = "writing feature-rx-notify";
goto abort_transaction;
}
err = xenbus_printf(xbt, node, "feature-no-csum-offload", "%d", 1);
if (err) {
message = "writing feature-no-csum-offload";
goto abort_transaction;
}
err = xenbus_printf(xbt, node, "feature-sg", "%d", 1);
if (err) {
message = "writing feature-sg";
goto abort_transaction;
}
#ifdef HAVE_TSO
#if __FreeBSD_version >= 700000
err = xenbus_printf(xbt, node, "feature-gso-tcpv4", "%d", 1);
if (err) {
message = "writing feature-gso-tcpv4";
@ -569,7 +579,7 @@ setup_device(device_t dev, struct netfront_info *info)
goto fail;
error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
"xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq);
"xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq);
if (error) {
xenbus_dev_fatal(dev, error,
@ -586,6 +596,24 @@ setup_device(device_t dev, struct netfront_info *info)
return (error);
}
/**
* If this interface has an ipv4 address, send an arp for it. This
* helps to get the network going again after migrating hosts.
*/
static void
netfront_send_fake_arp(device_t dev, struct netfront_info *info)
{
struct ifnet *ifp;
struct ifaddr *ifa;
ifp = info->xn_ifp;
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family == AF_INET) {
arp_ifinit(ifp, ifa);
}
}
}
/**
* Callback received when the backend's state changes.
*/
@ -611,9 +639,7 @@ netfront_backend_changed(device_t dev, XenbusState newstate)
if (network_connect(sc) != 0)
break;
xenbus_set_state(dev, XenbusStateConnected);
#ifdef notyet
(void)send_fake_arp(netdev);
#endif
netfront_send_fake_arp(dev, sc);
break;
case XenbusStateClosing:
xenbus_set_state(dev, XenbusStateClosed);
@ -851,6 +877,10 @@ static void
xn_rxeof(struct netfront_info *np)
{
struct ifnet *ifp;
#if __FreeBSD_version >= 700000
struct lro_ctrl *lro = &np->xn_lro;
struct lro_entry *queued;
#endif
struct netfront_rx_info rinfo;
struct netif_rx_response *rx = &rinfo.rx;
struct netif_extra_info *extras = rinfo.extras;
@ -945,13 +975,35 @@ xn_rxeof(struct netfront_info *np)
* Do we really need to drop the rx lock?
*/
XN_RX_UNLOCK(np);
/* Pass it up. */
#if __FreeBSD_version >= 700000
/* Use LRO if possible */
if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
/*
* If LRO fails, pass up to the stack
* directly.
*/
(*ifp->if_input)(ifp, m);
}
#else
(*ifp->if_input)(ifp, m);
#endif
XN_RX_LOCK(np);
}
np->rx.rsp_cons = i;
#if __FreeBSD_version >= 700000
/*
* Flush any outstanding LRO work
*/
while (!SLIST_EMPTY(&lro->lro_active)) {
queued = SLIST_FIRST(&lro->lro_active);
SLIST_REMOVE_HEAD(&lro->lro_active, next);
tcp_lro_flush(lro, queued);
}
#endif
#if 0
/* If we get a callback with very few responses, reduce fill target. */
/* NB. Note exponential increase, linear decrease. */
@ -972,6 +1024,7 @@ xn_txeof(struct netfront_info *np)
RING_IDX i, prod;
unsigned short id;
struct ifnet *ifp;
netif_tx_response_t *txr;
struct mbuf *m;
XN_TX_LOCK_ASSERT(np);
@ -987,10 +1040,19 @@ xn_txeof(struct netfront_info *np)
rmb(); /* Ensure we see responses up to 'rp'. */
for (i = np->tx.rsp_cons; i != prod; i++) {
id = RING_GET_RESPONSE(&np->tx, i)->id;
txr = RING_GET_RESPONSE(&np->tx, i);
if (txr->status == NETIF_RSP_NULL)
continue;
id = txr->id;
m = np->xn_cdata.xn_tx_chain[id];
ifp->if_opackets++;
/*
* Increment packet count if this is the last
* mbuf of the chain.
*/
if (!m->m_next)
ifp->if_opackets++;
KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
M_ASSERTVALID(m);
if (unlikely(gnttab_query_foreign_access(
@ -1008,7 +1070,7 @@ xn_txeof(struct netfront_info *np)
np->xn_cdata.xn_tx_chain[id] = NULL;
add_id_to_freelist(np->xn_cdata.xn_tx_chain, id);
m_freem(m);
m_free(m);
}
np->tx.rsp_cons = prod;
@ -1235,12 +1297,11 @@ xennet_get_responses(struct netfront_info *np,
gnttab_release_grant_reference(&np->gref_rx_head, ref);
next:
if (m == NULL)
break;
m->m_len = rx->status;
m->m_data += rx->offset;
m0->m_pkthdr.len += rx->status;
if (m != NULL) {
m->m_len = rx->status;
m->m_data += rx->offset;
m0->m_pkthdr.len += rx->status;
}
if (!(rx->flags & NETRXF_more_data))
break;
@ -1304,13 +1365,14 @@ xn_start_locked(struct ifnet *ifp)
{
int otherend_id;
unsigned short id;
struct mbuf *m_head, *new_m;
struct mbuf *m_head, *m;
struct netfront_info *sc;
netif_tx_request_t *tx;
netif_extra_info_t *extra;
RING_IDX i;
grant_ref_t ref;
u_long mfn, tx_bytes;
int notify;
int notify, nfrags;
sc = ifp->if_softc;
otherend_id = xenbus_get_otherend_id(sc->xbdev);
@ -1330,36 +1392,96 @@ xn_start_locked(struct ifnet *ifp)
break;
}
id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
/*
* Defragment the mbuf if necessary.
*/
for (m = m_head, nfrags = 0; m; m = m->m_next)
nfrags++;
if (nfrags > MAX_SKB_FRAGS) {
m = m_defrag(m_head, M_DONTWAIT);
if (!m) {
m_freem(m_head);
break;
}
m_head = m;
}
/*
* Start packing the mbufs in this chain into
* the fragment pointers. Stop when we run out
* of fragments or hit the end of the mbuf chain.
*/
new_m = makembuf(m_head);
tx = RING_GET_REQUEST(&sc->tx, i);
tx->id = id;
ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
KASSERT((short)ref >= 0, ("Negative ref"));
mfn = virt_to_mfn(mtod(new_m, vm_offset_t));
gnttab_grant_foreign_access_ref(ref, otherend_id,
mfn, GNTMAP_readonly);
tx->gref = sc->grant_tx_ref[id] = ref;
tx->size = new_m->m_pkthdr.len;
#if 0
tx->flags = (skb->ip_summed == CHECKSUM_HW) ? NETTXF_csum_blank : 0;
m = m_head;
extra = NULL;
for (m = m_head; m; m = m->m_next) {
tx = RING_GET_REQUEST(&sc->tx, i);
id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
sc->xn_cdata.xn_tx_chain[id] = m;
tx->id = id;
ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
KASSERT((short)ref >= 0, ("Negative ref"));
mfn = virt_to_mfn(mtod(m, vm_offset_t));
gnttab_grant_foreign_access_ref(ref, otherend_id,
mfn, GNTMAP_readonly);
tx->gref = sc->grant_tx_ref[id] = ref;
tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
tx->flags = 0;
if (m == m_head) {
/*
* The first fragment has the entire packet
* size, subsequent fragments have just the
* fragment size. The backend works out the
* true size of the first fragment by
* subtracting the sizes of the other
* fragments.
*/
tx->size = m->m_pkthdr.len;
/*
* The first fragment contains the
* checksum flags and is optionally
* followed by extra data for TSO etc.
*/
if (m->m_pkthdr.csum_flags
& CSUM_DELAY_DATA) {
tx->flags |= (NETTXF_csum_blank
| NETTXF_data_validated);
}
#if __FreeBSD_version >= 700000
if (m->m_pkthdr.csum_flags & CSUM_TSO) {
struct netif_extra_info *gso =
(struct netif_extra_info *)
RING_GET_REQUEST(&sc->tx, ++i);
if (extra)
extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
else
tx->flags |= NETTXF_extra_info;
gso->u.gso.size = m->m_pkthdr.tso_segsz;
gso->u.gso.type =
XEN_NETIF_GSO_TYPE_TCPV4;
gso->u.gso.pad = 0;
gso->u.gso.features = 0;
gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
gso->flags = 0;
extra = gso;
}
#endif
tx->flags = 0;
new_m->m_next = NULL;
new_m->m_nextpkt = NULL;
} else {
tx->size = m->m_len;
}
if (m->m_next) {
tx->flags |= NETTXF_more_data;
i++;
}
}
m_freem(m_head);
BPF_MTAP(ifp, m_head);
sc->xn_cdata.xn_tx_chain[id] = new_m;
BPF_MTAP(ifp, new_m);
sc->stats.tx_bytes += new_m->m_pkthdr.len;
sc->stats.tx_bytes += m_head->m_pkthdr.len;
sc->stats.tx_packets++;
}
@ -1445,9 +1567,9 @@ xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
xn_ifinit_locked(sc);
arp_ifinit(ifp, ifa);
XN_UNLOCK(sc);
XN_UNLOCK(sc);
} else {
XN_UNLOCK(sc);
XN_UNLOCK(sc);
error = ether_ioctl(ifp, cmd, data);
}
break;
@ -1501,12 +1623,39 @@ xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
case SIOCSIFCAP:
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
if (mask & IFCAP_HWCSUM) {
if (IFCAP_HWCSUM & ifp->if_capenable)
ifp->if_capenable &= ~IFCAP_HWCSUM;
else
ifp->if_capenable |= IFCAP_HWCSUM;
if (mask & IFCAP_TXCSUM) {
if (IFCAP_TXCSUM & ifp->if_capenable) {
ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
| CSUM_IP | CSUM_TSO);
} else {
ifp->if_capenable |= IFCAP_TXCSUM;
ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
| CSUM_IP);
}
}
if (mask & IFCAP_RXCSUM) {
ifp->if_capenable ^= IFCAP_RXCSUM;
}
#if __FreeBSD_version >= 700000
if (mask & IFCAP_TSO4) {
if (IFCAP_TSO4 & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_TSO4;
ifp->if_hwassist &= ~CSUM_TSO;
} else if (IFCAP_TXCSUM & ifp->if_capenable) {
ifp->if_capenable |= IFCAP_TSO4;
ifp->if_hwassist |= CSUM_TSO;
} else {
DPRINTK("Xen requires tx checksum offload"
" be enabled to use TSO\n");
error = EINVAL;
}
}
if (mask & IFCAP_LRO) {
ifp->if_capenable ^= IFCAP_LRO;
}
#endif
error = 0;
break;
case SIOCADDMULTI:
@ -1715,11 +1864,21 @@ create_netdev(device_t dev)
ifp->if_mtu = ETHERMTU;
ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
#ifdef notyet
ifp->if_hwassist = XN_CSUM_FEATURES;
ifp->if_capabilities = IFCAP_HWCSUM;
#if __FreeBSD_version >= 700000
ifp->if_capabilities |= IFCAP_TSO4;
if (xn_enable_lro) {
int err = tcp_lro_init(&np->xn_lro);
if (err) {
device_printf(dev, "LRO initialization failed\n");
goto exit;
}
np->xn_lro.ifp = ifp;
ifp->if_capabilities |= IFCAP_LRO;
}
#endif
ifp->if_capenable = ifp->if_capabilities;
#endif
ether_ifattach(ifp, np->mac);
callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);

418
sys/dev/xen/xenpci/evtchn.c Normal file
View File

@ -0,0 +1,418 @@
/******************************************************************************
* evtchn.c
*
* A simplified event channel for para-drivers in unmodified linux
*
* Copyright (c) 2002-2005, K A Fraser
* Copyright (c) 2005, Intel Corporation <xiaofeng.ling@intel.com>
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/interrupt.h>
#include <sys/pcpu.h>
#include <machine/xen/xen-os.h>
#include <machine/xen/xenvar.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <xen/evtchn.h>
#include <sys/smp.h>
#include <dev/xen/xenpci/xenpcivar.h>
static inline unsigned long __ffs(unsigned long word)
{
__asm__("bsfq %1,%0"
:"=r" (word)
:"rm" (word));
return word;
}
#define is_valid_evtchn(x) ((x) != 0)
#define evtchn_from_irq(x) (irq_evtchn[irq].evtchn)
static struct {
struct mtx lock;
driver_intr_t *handler;
void *arg;
int evtchn;
int close:1; /* close on unbind_from_irqhandler()? */
int inuse:1;
int in_handler:1;
int mpsafe:1;
} irq_evtchn[256];
static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
[0 ... NR_EVENT_CHANNELS-1] = -1 };
static struct mtx irq_alloc_lock;
static device_t xenpci_device;
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
static unsigned int
alloc_xen_irq(void)
{
static int warned;
unsigned int irq;
mtx_lock(&irq_alloc_lock);
for (irq = 1; irq < ARRAY_SIZE(irq_evtchn); irq++) {
if (irq_evtchn[irq].inuse)
continue;
irq_evtchn[irq].inuse = 1;
mtx_unlock(&irq_alloc_lock);
return irq;
}
if (!warned) {
warned = 1;
printf("alloc_xen_irq: No available IRQ to bind to: "
"increase irq_evtchn[] size in evtchn.c.\n");
}
mtx_unlock(&irq_alloc_lock);
return -ENOSPC;
}
static void
free_xen_irq(int irq)
{
mtx_lock(&irq_alloc_lock);
irq_evtchn[irq].inuse = 0;
mtx_unlock(&irq_alloc_lock);
}
int
irq_to_evtchn_port(int irq)
{
return irq_evtchn[irq].evtchn;
}
void
mask_evtchn(int port)
{
shared_info_t *s = HYPERVISOR_shared_info;
synch_set_bit(port, &s->evtchn_mask[0]);
}
void
unmask_evtchn(int port)
{
evtchn_unmask_t op = { .port = port };
HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op);
}
int
bind_listening_port_to_irqhandler(unsigned int remote_domain,
const char *devname, driver_intr_t handler, void *arg,
unsigned long irqflags, unsigned int *irqp)
{
struct evtchn_alloc_unbound alloc_unbound;
unsigned int irq;
int error;
irq = alloc_xen_irq();
if (irq < 0)
return irq;
mtx_lock(&irq_evtchn[irq].lock);
alloc_unbound.dom = DOMID_SELF;
alloc_unbound.remote_dom = remote_domain;
error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
&alloc_unbound);
if (error) {
mtx_unlock(&irq_evtchn[irq].lock);
free_xen_irq(irq);
return (-error);
}
irq_evtchn[irq].handler = handler;
irq_evtchn[irq].arg = arg;
irq_evtchn[irq].evtchn = alloc_unbound.port;
irq_evtchn[irq].close = 1;
irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0;
evtchn_to_irq[alloc_unbound.port] = irq;
unmask_evtchn(alloc_unbound.port);
mtx_unlock(&irq_evtchn[irq].lock);
if (irqp)
*irqp = irq;
return (0);
}
int
bind_caller_port_to_irqhandler(unsigned int caller_port,
const char *devname, driver_intr_t handler, void *arg,
unsigned long irqflags, unsigned int *irqp)
{
unsigned int irq;
irq = alloc_xen_irq();
if (irq < 0)
return irq;
mtx_lock(&irq_evtchn[irq].lock);
irq_evtchn[irq].handler = handler;
irq_evtchn[irq].arg = arg;
irq_evtchn[irq].evtchn = caller_port;
irq_evtchn[irq].close = 0;
irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0;
evtchn_to_irq[caller_port] = irq;
unmask_evtchn(caller_port);
mtx_unlock(&irq_evtchn[irq].lock);
if (irqp)
*irqp = irq;
return (0);
}
void
unbind_from_irqhandler(unsigned int irq)
{
int evtchn;
mtx_lock(&irq_evtchn[irq].lock);
evtchn = evtchn_from_irq(irq);
if (is_valid_evtchn(evtchn)) {
evtchn_to_irq[evtchn] = -1;
mask_evtchn(evtchn);
if (irq_evtchn[irq].close) {
struct evtchn_close close = { .port = evtchn };
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
panic("EVTCHNOP_close failed");
}
}
irq_evtchn[irq].handler = NULL;
irq_evtchn[irq].evtchn = 0;
mtx_unlock(&irq_evtchn[irq].lock);
while (irq_evtchn[irq].in_handler)
cpu_relax();
free_xen_irq(irq);
}
void notify_remote_via_irq(int irq)
{
int evtchn;
evtchn = evtchn_from_irq(irq);
if (is_valid_evtchn(evtchn))
notify_remote_via_evtchn(evtchn);
}
static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
unsigned int idx)
{
return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
}
static void
evtchn_interrupt(void *arg)
{
unsigned int l1i, l2i, port;
unsigned long masked_l1, masked_l2;
/* XXX: All events are bound to vcpu0 but irq may be redirected. */
int cpu = 0; /*smp_processor_id();*/
driver_intr_t *handler;
void *handler_arg;
int irq, handler_mpsafe;
shared_info_t *s = HYPERVISOR_shared_info;
vcpu_info_t *v = &s->vcpu_info[cpu];
struct pcpu *pc = pcpu_find(cpu);
unsigned long l1, l2;
v->evtchn_upcall_pending = 0;
#if 0
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
/* Clear master flag /before/ clearing selector flag. */
wmb();
#endif
#endif
l1 = atomic_readandclear_long(&v->evtchn_pending_sel);
l1i = pc->pc_last_processed_l1i;
l2i = pc->pc_last_processed_l2i;
while (l1 != 0) {
l1i = (l1i + 1) % LONG_BIT;
masked_l1 = l1 & ((~0UL) << l1i);
if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */
l1i = LONG_BIT - 1;
l2i = LONG_BIT - 1;
continue;
}
l1i = __ffs(masked_l1);
do {
l2 = active_evtchns(cpu, s, l1i);
l2i = (l2i + 1) % LONG_BIT;
masked_l2 = l2 & ((~0UL) << l2i);
if (masked_l2 == 0) { /* if we masked out all events, move on */
l2i = LONG_BIT - 1;
break;
}
l2i = __ffs(masked_l2);
/* process port */
port = (l1i * LONG_BIT) + l2i;
synch_clear_bit(port, &s->evtchn_pending[0]);
irq = evtchn_to_irq[port];
if (irq < 0)
continue;
mtx_lock(&irq_evtchn[irq].lock);
handler = irq_evtchn[irq].handler;
handler_arg = irq_evtchn[irq].arg;
handler_mpsafe = irq_evtchn[irq].mpsafe;
if (unlikely(handler == NULL)) {
printf("Xen IRQ%d (port %d) has no handler!\n",
irq, port);
mtx_unlock(&irq_evtchn[irq].lock);
continue;
}
irq_evtchn[irq].in_handler = 1;
mtx_unlock(&irq_evtchn[irq].lock);
//local_irq_enable();
if (!handler_mpsafe)
mtx_lock(&Giant);
handler(handler_arg);
if (!handler_mpsafe)
mtx_unlock(&Giant);
//local_irq_disable();
mtx_lock(&irq_evtchn[irq].lock);
irq_evtchn[irq].in_handler = 0;
mtx_unlock(&irq_evtchn[irq].lock);
/* if this is the final port processed, we'll pick up here+1 next time */
pc->pc_last_processed_l1i = l1i;
pc->pc_last_processed_l2i = l2i;
} while (l2i != LONG_BIT - 1);
l2 = active_evtchns(cpu, s, l1i);
if (l2 == 0) /* we handled all ports, so we can clear the selector bit */
l1 &= ~(1UL << l1i);
}
}
void
irq_suspend(void)
{
struct xenpci_softc *scp = device_get_softc(xenpci_device);
/*
* Take our interrupt handler out of the list of handlers
* that can handle this irq.
*/
if (scp->intr_cookie != NULL) {
if (BUS_TEARDOWN_INTR(device_get_parent(xenpci_device),
xenpci_device, scp->res_irq, scp->intr_cookie) != 0)
printf("intr teardown failed.. continuing\n");
scp->intr_cookie = NULL;
}
}
void
irq_resume(void)
{
struct xenpci_softc *scp = device_get_softc(xenpci_device);
int evtchn, irq;
for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) {
mask_evtchn(evtchn);
evtchn_to_irq[evtchn] = -1;
}
for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++)
irq_evtchn[irq].evtchn = 0;
BUS_SETUP_INTR(device_get_parent(xenpci_device),
xenpci_device, scp->res_irq, INTR_TYPE_MISC,
NULL, evtchn_interrupt, NULL, &scp->intr_cookie);
}
int
xenpci_irq_init(device_t device, struct xenpci_softc *scp)
{
int irq, cpu;
int error;
mtx_init(&irq_alloc_lock, "xen-irq-lock", NULL, MTX_DEF);
for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++)
mtx_init(&irq_evtchn[irq].lock, "irq-evtchn", NULL, MTX_DEF);
for (cpu = 0; cpu < mp_ncpus; cpu++) {
pcpu_find(cpu)->pc_last_processed_l1i = LONG_BIT - 1;
pcpu_find(cpu)->pc_last_processed_l2i = LONG_BIT - 1;
}
error = BUS_SETUP_INTR(device_get_parent(device), device,
scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC, NULL, evtchn_interrupt,
NULL, &scp->intr_cookie);
if (error)
return (error);
xenpci_device = device;
return (0);
}

View File

@ -0,0 +1,80 @@
/*-
* Copyright (c) 2008 Citrix Systems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/interrupt.h>
#include <machine/atomic.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <dev/xen/xenpci/xenpcivar.h>
void
xen_suspend()
{
int suspend_cancelled;
if (DEVICE_SUSPEND(root_bus)) {
printf("xen_suspend: device_suspend failed\n");
return;
}
/*
* Make sure we don't change cpus or switch to some other
* thread. for the duration.
*/
critical_enter();
/*
* Prevent any races with evtchn_interrupt() handler.
*/
irq_suspend();
disable_intr();
suspend_cancelled = HYPERVISOR_suspend(0);
if (!suspend_cancelled)
xenpci_resume();
/*
* Re-enable interrupts and put the scheduler back to normal.
*/
enable_intr();
critical_exit();
/*
* FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
* similar.
*/
if (!suspend_cancelled)
DEVICE_RESUME(root_bus);
}

399
sys/dev/xen/xenpci/xenpci.c Normal file
View File

@ -0,0 +1,399 @@
/*
* Copyright (c) 2008 Citrix Systems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <machine/bus.h>
#include <machine/resource.h>
#include <sys/rman.h>
#include <machine/stdarg.h>
#include <machine/xen/xen-os.h>
#include <xen/features.h>
#include <xen/hypervisor.h>
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
#include <xen/interface/memory.h>
#include <xen/interface/hvm/params.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/pmap.h>
#include <dev/xen/xenpci/xenpcivar.h>
/*
* These variables are used by the rest of the kernel to access the
* hypervisor.
*/
char *hypercall_stubs;
shared_info_t *HYPERVISOR_shared_info;
static vm_paddr_t shared_info_pa;
/*
* This is used to find our platform device instance.
*/
static devclass_t xenpci_devclass;
/*
* Return the CPUID base address for Xen functions.
*/
static uint32_t
xenpci_cpuid_base(void)
{
uint32_t base, regs[4];
for (base = 0x40000000; base < 0x40001000; base += 0x100) {
do_cpuid(base, regs);
if (!memcmp("XenVMMXenVMM", &regs[1], 12)
&& (regs[0] - base) >= 2)
return (base);
}
return (0);
}
/*
* Allocate and fill in the hypcall page.
*/
static int
xenpci_init_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
{
uint32_t base, regs[4];
int i;
base = xenpci_cpuid_base();
if (!base) {
device_printf(dev, "Xen platform device but not Xen VMM\n");
return (EINVAL);
}
if (bootverbose) {
do_cpuid(base + 1, regs);
device_printf(dev, "Xen version %d.%d.\n",
regs[0] >> 16, regs[0] & 0xffff);
}
/*
* Find the hypercall pages.
*/
do_cpuid(base + 2, regs);
hypercall_stubs = malloc(regs[0] * PAGE_SIZE, M_TEMP, M_WAITOK);
for (i = 0; i < regs[0]; i++) {
wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
}
return (0);
}
/*
* After a resume, re-initialise the hypercall page.
*/
static void
xenpci_resume_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
{
uint32_t base, regs[4];
int i;
base = xenpci_cpuid_base();
do_cpuid(base + 2, regs);
for (i = 0; i < regs[0]; i++) {
wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
}
}
/*
* Tell the hypervisor how to contact us for event channel callbacks.
*/
static void
xenpci_set_callback(device_t dev)
{
int irq;
uint64_t callback;
struct xen_hvm_param xhp;
irq = pci_get_irq(dev);
if (irq < 16) {
callback = irq;
} else {
callback = (pci_get_intpin(dev) - 1) & 3;
callback |= pci_get_slot(dev) << 11;
callback |= 1ull << 56;
}
xhp.domid = DOMID_SELF;
xhp.index = HVM_PARAM_CALLBACK_IRQ;
xhp.value = callback;
if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp))
panic("Can't set evtchn callback");
}
/*
* Deallocate anything allocated by xenpci_allocate_resources.
*/
static int
xenpci_deallocate_resources(device_t dev)
{
struct xenpci_softc *scp = device_get_softc(dev);
if (scp->res_irq != 0) {
bus_deactivate_resource(dev, SYS_RES_IRQ,
scp->rid_irq, scp->res_irq);
bus_release_resource(dev, SYS_RES_IRQ,
scp->rid_irq, scp->res_irq);
scp->res_irq = 0;
}
if (scp->res_memory != 0) {
bus_deactivate_resource(dev, SYS_RES_MEMORY,
scp->rid_memory, scp->res_memory);
bus_release_resource(dev, SYS_RES_MEMORY,
scp->rid_memory, scp->res_memory);
scp->res_memory = 0;
}
return (0);
}
/*
* Allocate irq and memory resources.
*/
static int
xenpci_allocate_resources(device_t dev)
{
struct xenpci_softc *scp = device_get_softc(dev);
scp->res_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
&scp->rid_irq, RF_SHAREABLE|RF_ACTIVE);
if (scp->res_irq == NULL)
goto errexit;
scp->rid_memory = PCIR_BAR(1);
scp->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&scp->rid_memory, RF_ACTIVE);
if (scp->res_memory == NULL)
goto errexit;
return (0);
errexit:
/* Cleanup anything we may have assigned. */
xenpci_deallocate_resources(dev);
return (ENXIO); /* For want of a better idea. */
}
/*
* Allocate a physical address range from our mmio region.
*/
static int
xenpci_alloc_space_int(struct xenpci_softc *scp, size_t sz,
vm_paddr_t *pa)
{
if (scp->phys_next + sz > rman_get_end(scp->res_memory)) {
return (ENOMEM);
}
*pa = scp->phys_next;
scp->phys_next += sz;
return (0);
}
/*
* Allocate a physical address range from our mmio region.
*/
int
xenpci_alloc_space(size_t sz, vm_paddr_t *pa)
{
device_t dev = devclass_get_device(xenpci_devclass, 0);
if (dev) {
return (xenpci_alloc_space_int(device_get_softc(dev),
sz, pa));
} else {
return (ENOMEM);
}
}
/*
* Called very early in the resume sequence - reinitialise the various
* bits of Xen machinery including the hypercall page and the shared
* info page.
*/
void
xenpci_resume()
{
device_t dev = devclass_get_device(xenpci_devclass, 0);
struct xenpci_softc *scp = device_get_softc(dev);
struct xen_add_to_physmap xatp;
xenpci_resume_hypercall_stubs(dev, scp);
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
panic("HYPERVISOR_memory_op failed");
pmap_kenter((vm_offset_t) HYPERVISOR_shared_info, shared_info_pa);
xenpci_set_callback(dev);
gnttab_resume();
irq_resume();
}
/*
* Probe - just check device ID.
*/
static int
xenpci_probe(device_t dev)
{
if (pci_get_devid(dev) != 0x00015853)
return (ENXIO);
device_set_desc(dev, "Xen Platform Device");
return (bus_generic_probe(dev));
}
/*
* Attach - find resources and talk to Xen.
*/
static int
xenpci_attach(device_t dev)
{
int error;
struct xenpci_softc *scp = device_get_softc(dev);
struct xen_add_to_physmap xatp;
vm_offset_t shared_va;
error = xenpci_allocate_resources(dev);
if (error)
goto errexit;
scp->phys_next = rman_get_start(scp->res_memory);
error = xenpci_init_hypercall_stubs(dev, scp);
if (error)
goto errexit;
setup_xen_features();
xenpci_alloc_space_int(scp, PAGE_SIZE, &shared_info_pa);
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
panic("HYPERVISOR_memory_op failed");
shared_va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
pmap_kenter(shared_va, shared_info_pa);
HYPERVISOR_shared_info = (void *) shared_va;
/*
* Hook the irq up to evtchn
*/
xenpci_irq_init(dev, scp);
xenpci_set_callback(dev);
return (bus_generic_attach(dev));
errexit:
/*
* Undo anything we may have done.
*/
xenpci_deallocate_resources(dev);
return (error);
}
/*
* Detach - reverse anything done by attach.
*/
static int
xenpci_detach(device_t dev)
{
struct xenpci_softc *scp = device_get_softc(dev);
device_t parent = device_get_parent(dev);
/*
* Take our interrupt handler out of the list of handlers
* that can handle this irq.
*/
if (scp->intr_cookie != NULL) {
if (BUS_TEARDOWN_INTR(parent, dev,
scp->res_irq, scp->intr_cookie) != 0)
printf("intr teardown failed.. continuing\n");
scp->intr_cookie = NULL;
}
/*
* Deallocate any system resources we may have
* allocated on behalf of this driver.
*/
return (xenpci_deallocate_resources(dev));
}
static device_method_t xenpci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, xenpci_probe),
DEVMETHOD(device_attach, xenpci_attach),
DEVMETHOD(device_detach, xenpci_detach),
DEVMETHOD(device_suspend, bus_generic_suspend),
DEVMETHOD(device_resume, bus_generic_resume),
/* Bus interface */
DEVMETHOD(bus_add_child, bus_generic_add_child),
{ 0, 0 }
};
static driver_t xenpci_driver = {
"xenpci",
xenpci_methods,
sizeof(struct xenpci_softc),
};
DRIVER_MODULE(xenpci, pci, xenpci_driver, xenpci_devclass, 0, 0);

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2008 Citrix Systems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* One of these per allocated device.
*/
struct xenpci_softc {
int rid_ioport;
int rid_memory;
int rid_irq;
struct resource* res_memory; /* Resource for mem range. */
struct resource* res_irq; /* Resource for irq range. */
void *intr_cookie;
vm_paddr_t phys_next; /* next page from mem range */
};
extern int xenpci_irq_init(device_t device, struct xenpci_softc *scp);
extern int xenpci_alloc_space(size_t sz, vm_paddr_t *pa);
extern void xenpci_resume(void);
extern void xen_suspend(void);

View File

@ -222,7 +222,11 @@ set_phys_to_machine(unsigned long pfn, unsigned long mfn)
xen_phys_machine[pfn] = mfn;
}
static __inline int
phys_to_machine_mapping_valid(unsigned long pfn)
{
return xen_phys_machine[pfn] != INVALID_P2M_ENTRY;
}
#endif /* _XEN_XENPMAP_H_ */

View File

@ -13,56 +13,28 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/interrupt.h>
#include <sys/pcpu.h>
#include <sys/smp.h>
#include <machine/cpufunc.h>
#include <machine/intr_machdep.h>
#include <machine/xen/xen-os.h>
#include <machine/xen/xenvar.h>
#include <xen/xen_intr.h>
#include <machine/xen/synch_bitops.h>
#include <xen/evtchn.h>
#include <xen/hypervisor.h>
#include <sys/smp.h>
/* linux helper functions that got sucked in
* rename and move XXX
*/
static inline int find_first_bit(const unsigned long *addr, unsigned size)
{
int d0, d1;
int res;
/* This looks at memory. Mark it volatile to tell gcc not to move it around */
__asm__ __volatile__(
"xorl %%eax,%%eax\n\t"
"repe; scasl\n\t"
"jz 1f\n\t"
"leal -4(%%edi),%%edi\n\t"
"bsfl (%%edi),%%eax\n"
"1:\tsubl %%ebx,%%edi\n\t"
"shll $3,%%edi\n\t"
"addl %%edi,%%eax"
:"=a" (res), "=&c" (d0), "=&D" (d1)
:"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
return res;
}
#define min_t(type,x,y) \
({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
#define first_cpu(src) __first_cpu(&(src), NR_CPUS)
static inline int __first_cpu(const xen_cpumask_t *srcp, int nbits)
{
return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
}
#include <xen/xen_intr.h>
#include <xen/evtchn.h>
static inline unsigned long __ffs(unsigned long word)
{
@ -166,7 +138,7 @@ static int irq_bindcount[NR_IRQS];
#ifdef SMP
static uint8_t cpu_evtchn[NR_EVENT_CHANNELS];
static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
static unsigned long cpu_evtchn_mask[MAX_VIRT_CPUS][NR_EVENT_CHANNELS/LONG_BIT];
#define active_evtchns(cpu,sh,idx) \
((sh)->evtchn_pending[idx] & \
@ -220,7 +192,7 @@ evtchn_do_upcall(struct trapframe *frame)
shared_info_t *s;
vcpu_info_t *vcpu_info;
cpu = smp_processor_id();
cpu = PCPU_GET(cpuid);
s = HYPERVISOR_shared_info;
vcpu_info = &s->vcpu_info[cpu];
@ -236,7 +208,7 @@ evtchn_do_upcall(struct trapframe *frame)
while ((l2 = active_evtchns(cpu, s, l1i)) != 0) {
l2i = __ffs(l2);
port = (l1i * BITS_PER_LONG) + l2i;
port = (l1i * LONG_BIT) + l2i;
if ((irq = evtchn_to_irq[port]) != -1) {
struct intsrc *isrc = intr_lookup_source(irq);
/*
@ -258,7 +230,7 @@ ipi_pcpu(unsigned int cpu, int vector)
{
int irq;
irq = per_cpu(ipi_to_irq, cpu)[vector];
irq = PCPU_GET(ipi_to_irq[vector]);
notify_remote_via_irq(irq);
}
@ -310,11 +282,12 @@ bind_local_port_to_irq(unsigned int local_port)
mtx_lock_spin(&irq_mapping_update_lock);
PANIC_IF(evtchn_to_irq[local_port] != -1);
KASSERT(evtchn_to_irq[local_port] == -1,
("evtchn_to_irq inconsistent"));
if ((irq = find_unbound_irq()) < 0) {
struct evtchn_close close = { .port = local_port };
PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_close, &close));
HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
goto out;
}
@ -368,21 +341,20 @@ bind_virq_to_irq(unsigned int virq, unsigned int cpu)
mtx_lock_spin(&irq_mapping_update_lock);
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
if ((irq = pcpu_find(cpu)->pc_virq_to_irq[virq]) == -1) {
if ((irq = find_unbound_irq()) < 0)
goto out;
bind_virq.virq = virq;
bind_virq.vcpu = cpu;
PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq) != 0);
HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq);
evtchn = bind_virq.port;
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
per_cpu(virq_to_irq, cpu)[virq] = irq;
pcpu_find(cpu)->pc_virq_to_irq[virq] = irq;
bind_evtchn_to_cpu(evtchn, cpu);
}
@ -407,18 +379,18 @@ bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
mtx_lock_spin(&irq_mapping_update_lock);
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
if ((irq = pcpu_find(cpu)->pc_ipi_to_irq[ipi]) == -1) {
if ((irq = find_unbound_irq()) < 0)
goto out;
bind_ipi.vcpu = cpu;
PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0);
HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
evtchn = bind_ipi.port;
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
per_cpu(ipi_to_irq, cpu)[ipi] = irq;
pcpu_find(cpu)->pc_ipi_to_irq[ipi] = irq;
bind_evtchn_to_cpu(evtchn, cpu);
}
@ -432,24 +404,27 @@ bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
}
void
static void
unbind_from_irq(int irq)
{
struct evtchn_close close;
int evtchn = evtchn_from_irq(irq);
int cpu;
mtx_lock_spin(&irq_mapping_update_lock);
if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
close.port = evtchn;
PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0);
HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))[index_from_irq(irq)] = -1;
cpu = cpu_from_evtchn(evtchn);
pcpu_find(cpu)->pc_virq_to_irq[index_from_irq(irq)] = -1;
break;
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))[index_from_irq(irq)] = -1;
cpu = cpu_from_evtchn(evtchn);
pcpu_find(cpu)->pc_ipi_to_irq[index_from_irq(irq)] = -1;
break;
default:
break;
@ -467,11 +442,8 @@ unbind_from_irq(int irq)
int
bind_caller_port_to_irqhandler(unsigned int caller_port,
const char *devname,
driver_intr_t handler,
void *arg,
unsigned long irqflags,
unsigned int *irqp)
const char *devname, driver_intr_t handler, void *arg,
unsigned long irqflags, unsigned int *irqp)
{
unsigned int irq;
int error;
@ -493,13 +465,9 @@ bind_caller_port_to_irqhandler(unsigned int caller_port,
}
int
bind_listening_port_to_irqhandler(
unsigned int remote_domain,
const char *devname,
driver_intr_t handler,
void *arg,
unsigned long irqflags,
unsigned int *irqp)
bind_listening_port_to_irqhandler(unsigned int remote_domain,
const char *devname, driver_intr_t handler, void *arg,
unsigned long irqflags, unsigned int *irqp)
{
unsigned int irq;
int error;
@ -519,14 +487,10 @@ bind_listening_port_to_irqhandler(
}
int
bind_interdomain_evtchn_to_irqhandler(
unsigned int remote_domain,
unsigned int remote_port,
const char *devname,
driver_filter_t filter,
driver_intr_t handler,
unsigned long irqflags,
unsigned int *irqp)
bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
unsigned int remote_port, const char *devname,
driver_filter_t filter, driver_intr_t handler,
unsigned long irqflags, unsigned int *irqp)
{
unsigned int irq;
int error;
@ -546,14 +510,9 @@ bind_interdomain_evtchn_to_irqhandler(
}
int
bind_virq_to_irqhandler(unsigned int virq,
unsigned int cpu,
const char *devname,
driver_filter_t filter,
driver_intr_t handler,
void *arg,
unsigned long irqflags,
unsigned int *irqp)
bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
const char *devname, driver_filter_t filter, driver_intr_t handler,
unsigned long irqflags, unsigned int *irqp)
{
unsigned int irq;
int error;
@ -573,12 +532,9 @@ bind_virq_to_irqhandler(unsigned int virq,
}
int
bind_ipi_to_irqhandler(unsigned int ipi,
unsigned int cpu,
const char *devname,
driver_filter_t filter,
unsigned long irqflags,
unsigned int *irqp)
bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu,
const char *devname, driver_filter_t filter,
unsigned long irqflags, unsigned int *irqp)
{
unsigned int irq;
int error;
@ -636,9 +592,9 @@ rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
}
static void set_affinity_irq(unsigned irq, xen_cpumask_t dest)
static void set_affinity_irq(unsigned irq, cpumask_t dest)
{
unsigned tcpu = first_cpu(dest);
unsigned tcpu = ffs(dest) - 1;
rebind_irq_to_cpu(irq, tcpu);
}
#endif
@ -656,13 +612,11 @@ static void xenpic_dynirq_enable_source(struct intsrc *isrc);
static void xenpic_dynirq_disable_source(struct intsrc *isrc, int);
static void xenpic_dynirq_eoi_source(struct intsrc *isrc);
static void xenpic_dynirq_enable_intr(struct intsrc *isrc);
static void xenpic_dynirq_disable_intr(struct intsrc *isrc);
static void xenpic_pirq_enable_source(struct intsrc *isrc);
static void xenpic_pirq_disable_source(struct intsrc *isrc, int);
static void xenpic_pirq_eoi_source(struct intsrc *isrc);
static void xenpic_pirq_enable_intr(struct intsrc *isrc);
static void xenpic_pirq_disable_intr(struct intsrc *isrc);
static int xenpic_vector(struct intsrc *isrc);
@ -677,7 +631,6 @@ struct pic xenpic_dynirq_template = {
.pic_disable_source = xenpic_dynirq_disable_source,
.pic_eoi_source = xenpic_dynirq_eoi_source,
.pic_enable_intr = xenpic_dynirq_enable_intr,
.pic_disable_intr = xenpic_dynirq_disable_intr,
.pic_vector = xenpic_vector,
.pic_source_pending = xenpic_source_pending,
.pic_suspend = xenpic_suspend,
@ -689,7 +642,6 @@ struct pic xenpic_pirq_template = {
.pic_disable_source = xenpic_pirq_disable_source,
.pic_eoi_source = xenpic_pirq_eoi_source,
.pic_enable_intr = xenpic_pirq_enable_intr,
.pic_disable_intr = xenpic_pirq_disable_intr,
.pic_vector = xenpic_vector,
.pic_source_pending = xenpic_source_pending,
.pic_suspend = xenpic_suspend,
@ -747,20 +699,6 @@ xenpic_dynirq_enable_intr(struct intsrc *isrc)
mtx_unlock_spin(&irq_mapping_update_lock);
}
static void
xenpic_dynirq_disable_intr(struct intsrc *isrc)
{
unsigned int irq;
struct xenpic_intsrc *xp;
xp = (struct xenpic_intsrc *)isrc;
mtx_lock_spin(&irq_mapping_update_lock);
xp->xp_masked = 1;
irq = xenpic_vector(isrc);
mask_evtchn(evtchn_from_irq(irq));
mtx_unlock_spin(&irq_mapping_update_lock);
}
static void
xenpic_dynirq_eoi_source(struct intsrc *isrc)
{
@ -825,7 +763,7 @@ notify_remote_via_irq(int irq)
if (VALID_EVTCHN(evtchn))
notify_remote_via_evtchn(evtchn);
else
panic("invalid evtchn");
panic("invalid evtchn %d", irq);
}
/* required for support of physical devices */
@ -898,32 +836,6 @@ xenpic_pirq_enable_intr(struct intsrc *isrc)
mtx_unlock_spin(&irq_mapping_update_lock);
}
static void
xenpic_pirq_disable_intr(struct intsrc *isrc)
{
unsigned int irq;
int evtchn;
struct evtchn_close close;
mtx_lock_spin(&irq_mapping_update_lock);
irq = xenpic_vector(isrc);
evtchn = evtchn_from_irq(irq);
if (!VALID_EVTCHN(evtchn))
goto done;
mask_evtchn(evtchn);
close.port = evtchn;
PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0);
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
irq_info[irq] = IRQ_UNBOUND;
done:
mtx_unlock_spin(&irq_mapping_update_lock);
}
static void
xenpic_pirq_enable_source(struct intsrc *isrc)
{
@ -998,7 +910,7 @@ void
unmask_evtchn(int port)
{
shared_info_t *s = HYPERVISOR_shared_info;
unsigned int cpu = smp_processor_id();
unsigned int cpu = PCPU_GET(cpuid);
vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
/* Slow path (hypercall) if this is a non-local port. */
@ -1016,7 +928,7 @@ unmask_evtchn(int port)
* masked.
*/
if (synch_test_bit(port, &s->evtchn_pending) &&
!synch_test_and_set_bit(port / BITS_PER_LONG,
!synch_test_and_set_bit(port / LONG_BIT,
&vcpu_info->evtchn_pending_sel)) {
vcpu_info->evtchn_upcall_pending = 1;
if (!vcpu_info->evtchn_upcall_mask)
@ -1039,15 +951,21 @@ void irq_resume(void)
mask_evtchn(evtchn);
/* Check that no PIRQs are still bound. */
for (pirq = 0; pirq < NR_PIRQS; pirq++)
PANIC_IF(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND);
for (pirq = 0; pirq < NR_PIRQS; pirq++) {
KASSERT(irq_info[pirq_to_irq(pirq)] == IRQ_UNBOUND,
("pirq_to_irq inconsistent"));
}
/* Secondary CPUs must have no VIRQ or IPI bindings. */
for (cpu = 1; cpu < NR_CPUS; cpu++) {
for (virq = 0; virq < NR_VIRQS; virq++)
PANIC_IF(per_cpu(virq_to_irq, cpu)[virq] != -1);
for (ipi = 0; ipi < NR_IPIS; ipi++)
PANIC_IF(per_cpu(ipi_to_irq, cpu)[ipi] != -1);
for (cpu = 1; cpu < MAX_VIRT_CPUS; cpu++) {
for (virq = 0; virq < NR_VIRQS; virq++) {
KASSERT(pcpu_find(cpu)->pc_virq_to_irq[virq] == -1,
("virq_to_irq inconsistent"));
}
for (ipi = 0; ipi < NR_IPIS; ipi++) {
KASSERT(pcpu_find(cpu)->pc_ipi_to_irq[ipi] == -1,
("ipi_to_irq inconsistent"));
}
}
/* No IRQ <-> event-channel mappings. */
@ -1058,15 +976,16 @@ void irq_resume(void)
/* Primary CPU: rebind VIRQs automatically. */
for (virq = 0; virq < NR_VIRQS; virq++) {
if ((irq = per_cpu(virq_to_irq, 0)[virq]) == -1)
if ((irq = pcpu_find(0)->pc_virq_to_irq[virq]) == -1)
continue;
PANIC_IF(irq_info[irq] != mk_irq_info(IRQT_VIRQ, virq, 0));
KASSERT(irq_info[irq] == mk_irq_info(IRQT_VIRQ, virq, 0),
("irq_info inconsistent"));
/* Get a new binding from Xen. */
bind_virq.virq = virq;
bind_virq.vcpu = 0;
PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq) != 0);
HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq);
evtchn = bind_virq.port;
/* Record the new mapping. */
@ -1079,15 +998,16 @@ void irq_resume(void)
/* Primary CPU: rebind IPIs automatically. */
for (ipi = 0; ipi < NR_IPIS; ipi++) {
if ((irq = per_cpu(ipi_to_irq, 0)[ipi]) == -1)
if ((irq = pcpu_find(0)->pc_ipi_to_irq[ipi]) == -1)
continue;
PANIC_IF(irq_info[irq] != mk_irq_info(IRQT_IPI, ipi, 0));
KASSERT(irq_info[irq] == mk_irq_info(IRQT_IPI, ipi, 0),
("irq_info inconsistent"));
/* Get a new binding from Xen. */
memset(&op, 0, sizeof(op));
bind_ipi.vcpu = 0;
PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0);
HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
evtchn = bind_ipi.port;
/* Record the new mapping. */
@ -1111,9 +1031,9 @@ evtchn_init(void *dummy __unused)
/* No VIRQ or IPI bindings. */
for (cpu = 0; cpu < mp_ncpus; cpu++) {
for (i = 0; i < NR_VIRQS; i++)
per_cpu(virq_to_irq, cpu)[i] = -1;
pcpu_find(cpu)->pc_virq_to_irq[i] = -1;
for (i = 0; i < NR_IPIS; i++)
per_cpu(ipi_to_irq, cpu)[i] = -1;
pcpu_find(cpu)->pc_ipi_to_irq[i] = -1;
}
/* No event-channel -> IRQ mappings. */

View File

@ -23,8 +23,6 @@ __FBSDID("$FreeBSD$");
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <machine/cpufunc.h>
#include <machine/intr_machdep.h>
#include <machine/xen/xen-os.h>
#include <xen/xen_intr.h>
#include <machine/bus.h>
@ -234,14 +232,14 @@ evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
__evtchn_reset_buffer_ring();
break;
case EVTCHN_BIND:
if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) )
unmask_evtchn((int)arg);
if ( !synch_test_and_set_bit((uintptr_t)arg, &bound_ports[0]) )
unmask_evtchn((uintptr_t)arg);
else
rc = EINVAL;
break;
case EVTCHN_UNBIND:
if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) )
mask_evtchn((int)arg);
if ( synch_test_and_clear_bit((uintptr_t)arg, &bound_ports[0]) )
mask_evtchn((uintptr_t)arg);
else
rc = EINVAL;
break;
@ -383,12 +381,12 @@ evtchn_dev_init(void *dummy __unused)
/* (DEVFS) automatically destroy the symlink with its destination. */
devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
#endif
printk("Event-channel device installed.\n");
if (bootverbose)
printf("Event-channel device installed.\n");
return 0;
}
SYSINIT(evtchn_dev_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_dev_init, NULL);

View File

@ -1,10 +1,12 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
#include <machine/xen/features.h>
#include <xen/features.h>
uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32] /* __read_mostly */;

20
sys/xen/features.h Normal file
View File

@ -0,0 +1,20 @@
/******************************************************************************
* features.h
*
* Query the features reported by Xen.
*
* Copyright (c) 2006, Ian Campbell
*/
#ifndef __ASM_XEN_FEATURES_H__
#define __ASM_XEN_FEATURES_H__
#include <xen/interface/version.h>
extern void setup_xen_features(void);
extern uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32];
#define xen_feature(flag) (xen_features[flag])
#endif /* __ASM_XEN_FEATURES_H__ */

View File

@ -25,29 +25,21 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_page.h>
#include <vm/vm_kern.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
#include <machine/xen/synch_bitops.h>
#include <xen/hypervisor.h>
#include <xen/gnttab.h>
#include <vm/vm.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c))
#if 1
#define ASSERT(_p) \
if ( !(_p) ) { printk("Assertion '%s': line %d, file %s\n", \
#_p , __LINE__, __FILE__); *(int*)0=0; }
#else
#define ASSERT(_p) ((void)0)
#endif
#define WPRINTK(fmt, args...) \
printk("xen_grant: " fmt, ##args)
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
#define GNTTAB_LIST_END 0xffffffff
@ -72,14 +64,14 @@ static int gnttab_expand(unsigned int req_entries);
static int
get_free_entries(int count, int *entries)
{
int ref, rc;
int ref, error;
grant_ref_t head;
mtx_lock(&gnttab_list_lock);
if ((gnttab_free_count < count) &&
((rc = gnttab_expand(count - gnttab_free_count)) != 0)) {
((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
mtx_unlock(&gnttab_list_lock);
return (rc);
return (error);
}
ref = head = gnttab_free_head;
gnttab_free_count -= count;
@ -163,6 +155,7 @@ void
gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
unsigned long frame, int readonly)
{
shared[ref].frame = frame;
shared[ref].domid = domid;
wmb();
@ -213,7 +206,8 @@ gnttab_end_foreign_access(grant_ref_t ref, void *page)
}
int
gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
grant_ref_t *result)
{
int error, ref;
@ -223,7 +217,8 @@ gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
return (ref);
*result = ref;
return (0);
}
void
@ -261,7 +256,7 @@ gnttab_end_foreign_transfer_ref(grant_ref_t ref)
/* Read the frame number /after/ reading completion status. */
rmb();
frame = shared[ref].frame;
PANIC_IF(frame == 0);
KASSERT(frame != 0, ("grant table inconsistent"));
return (frame);
}
@ -320,6 +315,7 @@ gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
int
gnttab_empty_grant_references(const grant_ref_t *private_head)
{
return (*private_head == GNTTAB_LIST_END);
}
@ -331,20 +327,20 @@ gnttab_claim_grant_reference(grant_ref_t *private_head)
if (unlikely(g == GNTTAB_LIST_END))
return (ENOSPC);
*private_head = gnttab_entry(g);
return (g);
}
void
gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release)
{
gnttab_entry(release) = *private_head;
*private_head = release;
}
void
gnttab_request_free_callback(struct gnttab_free_callback *callback,
void (*fn)(void *), void *arg, uint16_t count)
void (*fn)(void *), void *arg, uint16_t count)
{
mtx_lock(&gnttab_list_lock);
@ -387,7 +383,8 @@ grow_gnttab_list(unsigned int more_frames)
for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
{
gnttab_list[i] = (grant_ref_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
gnttab_list[i] = (grant_ref_t *)
malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
if (!gnttab_list[i])
goto grow_nomem;
@ -405,12 +402,12 @@ grow_gnttab_list(unsigned int more_frames)
check_free_callbacks();
return 0;
return (0);
grow_nomem:
for ( ; i >= nr_grant_frames; i--)
free(gnttab_list[i], M_DEVBUF);
return (-ENOMEM);
return (ENOMEM);
}
static unsigned int
@ -464,6 +461,8 @@ unmap_pte_fn(pte_t *pte, struct page *pmd_page,
}
#endif
#ifndef XENHVM
static int
gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
@ -486,50 +485,117 @@ gnttab_map(unsigned int start_idx, unsigned int end_idx)
free(frames, M_DEVBUF);
return (ENOSYS);
}
PANIC_IF(rc || setup.status);
KASSERT(!(rc || setup.status),
("unexpected result from grant_table_op"));
if (shared == NULL) {
vm_offset_t area;
area = kmem_alloc_nofault(kernel_map,
PAGE_SIZE * max_nr_grant_frames());
PANIC_IF(area == 0);
KASSERT(area, ("can't allocate VM space for grant table"));
shared = (grant_entry_t *)area;
}
for (i = 0; i < nr_gframes; i++)
PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE,
((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V);
free(frames, M_DEVBUF);
return 0;
return (0);
}
int
gnttab_resume(void)
{
if (max_nr_grant_frames() < nr_grant_frames)
return -ENOSYS;
return gnttab_map(0, nr_grant_frames - 1);
return (ENOSYS);
return (gnttab_map(0, nr_grant_frames - 1));
}
int
gnttab_suspend(void)
{
int i, pages;
int i;
pages = (PAGE_SIZE*nr_grant_frames) >> PAGE_SHIFT;
for (i = 0; i < pages; i++)
PT_SET_MA(shared + (i*PAGE_SIZE), (vm_paddr_t)0);
for (i = 0; i < nr_grant_frames; i++)
pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE);
return (0);
}
#else /* XENHVM */
#include <dev/xen/xenpci/xenpcivar.h>
static vm_paddr_t resume_frames;
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct xen_add_to_physmap xatp;
unsigned int i = end_idx;
/*
* Loop backwards, so that the first hypercall has the largest index,
* ensuring that the table will grow only once.
*/
do {
xatp.domid = DOMID_SELF;
xatp.idx = i;
xatp.space = XENMAPSPACE_grant_table;
xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
panic("HYPERVISOR_memory_op failed to map gnttab");
} while (i-- > start_idx);
if (shared == NULL) {
vm_offset_t area;
area = kmem_alloc_nofault(kernel_map,
PAGE_SIZE * max_nr_grant_frames());
KASSERT(area, ("can't allocate VM space for grant table"));
shared = (grant_entry_t *)area;
}
for (i = start_idx; i <= end_idx; i++) {
pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE,
resume_frames + i * PAGE_SIZE);
}
return (0);
}
int
gnttab_resume(void)
{
int error;
unsigned int max_nr_gframes, nr_gframes;
nr_gframes = nr_grant_frames;
max_nr_gframes = max_nr_grant_frames();
if (max_nr_gframes < nr_gframes)
return (ENOSYS);
if (!resume_frames) {
error = xenpci_alloc_space(PAGE_SIZE * max_nr_gframes,
&resume_frames);
if (error) {
printf("error mapping gnttab share frames\n");
return (error);
}
}
return (gnttab_map(0, nr_gframes - 1));
}
#endif
static int
gnttab_expand(unsigned int req_entries)
{
int rc;
int error;
unsigned int cur, extra;
cur = nr_grant_frames;
@ -538,10 +604,11 @@ gnttab_expand(unsigned int req_entries)
if (cur + extra > max_nr_grant_frames())
return (ENOSPC);
if ((rc = gnttab_map(cur, cur + extra - 1)) == 0)
rc = grow_gnttab_list(extra);
error = gnttab_map(cur, cur + extra - 1);
if (!error)
error = grow_gnttab_list(extra);
return rc;
return (error);
}
int
@ -552,7 +619,7 @@ gnttab_init()
unsigned int nr_init_grefs;
if (!is_running_on_xen())
return -ENODEV;
return (ENODEV);
nr_grant_frames = 1;
boot_max_nr_grant_frames = __max_nr_grant_frames();
@ -571,7 +638,8 @@ gnttab_init()
return (ENOMEM);
for (i = 0; i < nr_grant_frames; i++) {
gnttab_list[i] = (grant_ref_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
gnttab_list[i] = (grant_ref_t *)
malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
if (gnttab_list[i] == NULL)
goto ini_nomem;
}
@ -588,8 +656,10 @@ gnttab_init()
gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
gnttab_free_head = NR_RESERVED_ENTRIES;
printk("Grant table initialized\n");
return 0;
if (bootverbose)
printf("Grant table initialized\n");
return (0);
ini_nomem:
for (i--; i >= 0; i--)

View File

@ -36,10 +36,12 @@
#ifndef __ASM_GNTTAB_H__
#include <xen/interface/grant_table.h>
#include <xen/hypervisor.h>
#include <xen/interface/grant_table.h>
#include <machine/xen/xen-os.h>
#include <machine/xen/features.h>
#include <xen/features.h>
struct gnttab_free_callback {
struct gnttab_free_callback *next;
@ -50,6 +52,10 @@ struct gnttab_free_callback {
int gnttab_init(void);
/*
* Allocate a grant table reference and return it in *result. Returns
* zero on success or errno on error.
*/
int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
int flags, grant_ref_t *result);
@ -68,7 +74,7 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref);
*/
void gnttab_end_foreign_access(grant_ref_t ref, void *page);
int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result);
unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
@ -104,6 +110,10 @@ void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
int gnttab_suspend(void);
int gnttab_resume(void);
#if 0
#include <xen/features.h>
static inline void
gnttab_set_map_op(struct gnttab_map_grant_ref *map, vm_paddr_t addr,
uint32_t flags, grant_ref_t ref, domid_t domid)
@ -149,5 +159,6 @@ gnttab_set_replace_op(struct gnttab_unmap_and_replace *unmap, vm_paddr_t addr,
unmap->handle = handle;
}
#endif
#endif /* __ASM_GNTTAB_H__ */

View File

@ -8,11 +8,19 @@
* $FreeBSD$
*/
#ifndef __HYPERVISOR_H__
#define __HYPERVISOR_H__
#ifndef __XEN_HYPERVISOR_H__
#define __XEN_HYPERVISOR_H__
#ifdef XENHVM
#define is_running_on_xen() (HYPERVISOR_shared_info != NULL)
#else
#define is_running_on_xen() 1
#endif
#ifdef PAE
#ifndef CONFIG_X86_PAE
#define CONFIG_X86_PAE
@ -27,6 +35,7 @@
#include <xen/interface/physdev.h>
#include <xen/interface/sched.h>
#include <xen/interface/callback.h>
#include <xen/interface/memory.h>
#include <machine/xen/hypercall.h>
#if defined(__amd64__)
@ -131,7 +140,7 @@ MULTI_update_va_mapping(
mcl->op = __HYPERVISOR_update_va_mapping;
mcl->args[0] = va;
#if defined(__amd64__)
mcl->args[1] = new_val.pte;
mcl->args[1] = new_val;
#elif defined(PAE)
mcl->args[1] = (uint32_t)(new_val & 0xffffffff) ;
mcl->args[2] = (uint32_t)(new_val >> 32);
@ -142,4 +151,4 @@ MULTI_update_va_mapping(
mcl->args[MULTI_UVMFLAGS_INDEX] = flags;
}
#endif /* __HYPERVISOR_H__ */
#endif /* __XEN_HYPERVISOR_H__ */

View File

@ -32,7 +32,8 @@
#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
typedef struct { type *p; } __guest_handle_ ## name
#else
#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
#error "using old handle"
#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
typedef type * __guest_handle_ ## name
#endif
@ -50,7 +51,7 @@
#if defined(__i386__)
#include <xen/interface/arch-x86/xen-x86_32.h>
#elif defined(__x86_64__)
#include "xen-x86_64.h"
#include <xen/interface/arch-x86/xen-x86_64.h>
#endif
#ifndef __ASSEMBLY__

View File

@ -21,7 +21,7 @@
#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
#define __XEN_PUBLIC_HVM_PARAMS_H__
#include "hvm_op.h"
#include <xen/interface/hvm/hvm_op.h>
/*
* Parameter space for HVMOP_{set,get}_param.

262
sys/xen/reboot.c Normal file
View File

@ -0,0 +1,262 @@
/*
*
* Copyright (c) 2004 Christian Limpach.
* Copyright (c) 2004-2006,2008 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Christian Limpach.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/reboot.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <machine/xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
#include <xen/xenbus/xenbusvar.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#ifdef XENHVM
#include <dev/xen/xenpci/xenpcivar.h>
#else
static void xen_suspend(void);
#endif
static void
shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
char *str;
struct xenbus_transaction xbt;
int error, howto;
howto = 0;
again:
error = xenbus_transaction_start(&xbt);
if (error)
return;
error = xenbus_read(xbt, "control", "shutdown", NULL, (void **) &str);
/* Ignore read errors and empty reads. */
if (error || strlen(str) == 0) {
xenbus_transaction_end(xbt, 1);
return;
}
xenbus_write(xbt, "control", "shutdown", "");
error = xenbus_transaction_end(xbt, 0);
if (error == EAGAIN) {
free(str, M_DEVBUF);
goto again;
}
if (strcmp(str, "reboot") == 0)
howto = 0;
else if (strcmp(str, "poweroff") == 0)
howto |= (RB_POWEROFF | RB_HALT);
else if (strcmp(str, "halt") == 0)
#ifdef XENHVM
/*
* We rely on acpi powerdown to halt the VM.
*/
howto |= (RB_POWEROFF | RB_HALT);
#else
howto |= RB_HALT;
#endif
else if (strcmp(str, "suspend") == 0)
howto = -1;
else {
printf("Ignoring shutdown request: %s\n", str);
goto done;
}
if (howto == -1) {
xen_suspend();
goto done;
}
shutdown_nice(howto);
done:
free(str, M_DEVBUF);
}
#ifndef XENHVM
/*
* In HV mode, we let acpi take care of halts and reboots.
*/
static void
xen_shutdown_final(void *arg, int howto)
{
if (howto & (RB_HALT | RB_POWEROFF))
HYPERVISOR_shutdown(SHUTDOWN_poweroff);
else
HYPERVISOR_shutdown(SHUTDOWN_reboot);
}
#endif
static struct xenbus_watch shutdown_watch = {
.node = "control/shutdown",
.callback = shutdown_handler
};
static void
setup_shutdown_watcher(void *unused)
{
if (register_xenbus_watch(&shutdown_watch))
printf("Failed to set shutdown watcher\n");
#ifndef XENHVM
EVENTHANDLER_REGISTER(shutdown_final, xen_shutdown_final, NULL,
SHUTDOWN_PRI_LAST);
#endif
}
SYSINIT(shutdown, SI_SUB_PSEUDO, SI_ORDER_ANY, setup_shutdown_watcher, NULL);
#ifndef XENHVM
extern void xencons_suspend(void);
extern void xencons_resume(void);
static void
xen_suspend()
{
int i, j, k, fpp;
unsigned long max_pfn, start_info_mfn;
#ifdef SMP
cpumask_t map;
/*
* Bind us to CPU 0 and stop any other VCPUs.
*/
mtx_lock_spin(&sched_lock);
sched_bind(curthread, 0);
mtx_unlock_spin(&sched_lock);
KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0"));
map = PCPU_GET(other_cpus) & ~stopped_cpus;
if (map)
stop_cpus(map);
#endif
if (DEVICE_SUSPEND(root_bus) != 0) {
printf("xen_suspend: device_suspend failed\n");
if (map)
restart_cpus(map);
return;
}
local_irq_disable();
xencons_suspend();
gnttab_suspend();
max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
void *shared_info = HYPERVISOR_shared_info;
HYPERVISOR_shared_info = NULL;
pmap_kremove((vm_offset_t) shared_info);
PT_UPDATES_FLUSH();
xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn);
/*
* We'll stop somewhere inside this hypercall. When it returns,
* we'll start resuming after the restore.
*/
start_info_mfn = VTOMFN(xen_start_info);
pmap_suspend();
HYPERVISOR_suspend(start_info_mfn);
pmap_resume();
pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
HYPERVISOR_shared_info = shared_info;
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
VTOMFN(xen_pfn_to_mfn_frame_list_list);
fpp = PAGE_SIZE/sizeof(unsigned long);
for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
if ((j % fpp) == 0) {
k++;
xen_pfn_to_mfn_frame_list_list[k] =
VTOMFN(xen_pfn_to_mfn_frame_list[k]);
j = 0;
}
xen_pfn_to_mfn_frame_list[k][j] =
VTOMFN(&xen_phys_machine[i]);
}
HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
gnttab_resume();
irq_resume();
local_irq_enable();
xencons_resume();
#ifdef CONFIG_SMP
for_each_cpu(i)
vcpu_prepare(i);
#endif
/*
* Only resume xenbus /after/ we've prepared our VCPUs; otherwise
* the VCPU hotplug callback can race with our vcpu_prepare
*/
DEVICE_RESUME(root_bus);
#ifdef SMP
sched_unbind(curthread);
if (map)
restart_cpus(map);
#endif
}
#endif

View File

@ -29,37 +29,63 @@
#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
extern void unbind_from_irq(int irq);
/*
* Dynamic binding of event channels and VIRQ sources to guest IRQ space.
*/
/*
* Bind a caller port event channel to an interrupt handler. If
* successful, the guest IRQ number is returned in *irqp. Return zero
* on success or errno otherwise.
*/
extern int bind_caller_port_to_irqhandler(unsigned int caller_port,
const char *devname, driver_intr_t handler, void *arg,
unsigned long irqflags, unsigned int *irqp);
/*
* Bind a listening port to an interrupt handler. If successful, the
* guest IRQ number is returned in *irqp. Return zero on success or
* errno otherwise.
*/
extern int bind_listening_port_to_irqhandler(unsigned int remote_domain,
const char *devname, driver_intr_t handler, void *arg, unsigned long irqflags,
unsigned int *irqp);
const char *devname, driver_intr_t handler, void *arg,
unsigned long irqflags, unsigned int *irqp);
/*
* Bind a VIRQ to an interrupt handler. If successful, the guest IRQ
* number is returned in *irqp. Return zero on success or errno
* otherwise.
*/
extern int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
const char *devname, driver_filter_t filter, driver_intr_t handler,
void *arg, unsigned long irqflags, unsigned int *irqp);
extern int bind_ipi_to_irqhandler(unsigned int ipi,
unsigned int cpu,
const char *devname,
driver_filter_t handler,
unsigned long irqflags,
unsigned int *irqp);
/*
* Bind an IPI to an interrupt handler. If successful, the guest
* IRQ number is returned in *irqp. Return zero on success or errno
* otherwise.
*/
extern int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu,
const char *devname, driver_filter_t filter,
unsigned long irqflags, unsigned int *irqp);
/*
* Bind an interdomain event channel to an interrupt handler. If
* successful, the guest IRQ number is returned in *irqp. Return zero
* on success or errno otherwise.
*/
extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
unsigned int remote_port,
const char *devname,
driver_filter_t filter,
driver_intr_t handler,
unsigned long irqflags,
unsigned int *irqp);
unsigned int remote_port, const char *devname,
driver_filter_t filter, driver_intr_t handler,
unsigned long irqflags, unsigned int *irqp);
/*
* Unbind an interrupt handler using the guest IRQ number returned
* when it was bound.
*/
extern void unbind_from_irqhandler(unsigned int irq);
extern void unbind_from_irqhandler(unsigned int evtchn);
static __inline__ int irq_cannonicalize(int irq)
static __inline__ int irq_cannonicalize(unsigned int irq)
{
return (irq == 2) ? 9 : irq;
}

View File

@ -565,7 +565,6 @@ xenbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
return (ENOENT);
}
SYSCTL_DECL(_dev);
SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xen_store_evtchn, 0, "");
SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");

View File

@ -142,21 +142,17 @@ xs_read_reply(enum xsd_sockmsg_type *type, unsigned int *len, void **result)
mtx_lock(&xs_state.reply_lock);
while (TAILQ_EMPTY(&xs_state.reply_list)) {
while (TAILQ_EMPTY(&xs_state.reply_list)) {
error = mtx_sleep(&xs_state.reply_waitq,
&xs_state.reply_lock,
PCATCH, "xswait", hz/10);
if (error && error != EWOULDBLOCK) {
mtx_unlock(&xs_state.reply_lock);
return (error);
}
while (TAILQ_EMPTY(&xs_state.reply_list)) {
error = mtx_sleep(&xs_state.reply_waitq,
&xs_state.reply_lock,
PCATCH, "xswait", hz/10);
if (error && error != EWOULDBLOCK) {
mtx_unlock(&xs_state.reply_lock);
return (error);
}
}
}
msg = TAILQ_FIRST(&xs_state.reply_list);
TAILQ_REMOVE(&xs_state.reply_list, msg, list);
@ -202,7 +198,8 @@ xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
sx_xlock(&xs_state.request_mutex);
error = xb_write(msg, sizeof(*msg) + msg->len, &xs_state.request_mutex.lock_object);
error = xb_write(msg, sizeof(*msg) + msg->len,
&xs_state.request_mutex.lock_object);
if (error) {
msg->type = XS_ERROR;
} else {
@ -243,7 +240,8 @@ xs_talkv(struct xenbus_transaction t, enum xsd_sockmsg_type type,
sx_xlock(&xs_state.request_mutex);
error = xb_write(&msg, sizeof(msg), &xs_state.request_mutex.lock_object);
error = xb_write(&msg, sizeof(msg),
&xs_state.request_mutex.lock_object);
if (error) {
sx_xunlock(&xs_state.request_mutex);
printf("xs_talkv failed %d\n", error);
@ -251,7 +249,8 @@ xs_talkv(struct xenbus_transaction t, enum xsd_sockmsg_type type,
}
for (i = 0; i < num_vecs; i++) {
error = xb_write(iovec[i].iov_base, iovec[i].iov_len, &xs_state.request_mutex.lock_object);
error = xb_write(iovec[i].iov_base, iovec[i].iov_len,
&xs_state.request_mutex.lock_object);
if (error) {
sx_xunlock(&xs_state.request_mutex);
printf("xs_talkv failed %d\n", error);
@ -791,7 +790,8 @@ xs_process_msg(enum xsd_sockmsg_type *type)
msg = malloc(sizeof(*msg), M_DEVBUF, M_WAITOK);
mtx_lock(&xs_state.reply_lock);
error = xb_read(&msg->hdr, sizeof(msg->hdr), &xs_state.reply_lock.lock_object);
error = xb_read(&msg->hdr, sizeof(msg->hdr),
&xs_state.reply_lock.lock_object);
mtx_unlock(&xs_state.reply_lock);
if (error) {
free(msg, M_DEVBUF);
@ -800,7 +800,8 @@ xs_process_msg(enum xsd_sockmsg_type *type)
body = malloc(msg->hdr.len + 1, M_DEVBUF, M_WAITOK);
mtx_lock(&xs_state.reply_lock);
error = xb_read(body, msg->hdr.len, &xs_state.reply_lock.lock_object);
error = xb_read(body, msg->hdr.len,
&xs_state.reply_lock.lock_object);
mtx_unlock(&xs_state.reply_lock);
if (error) {
free(body, M_DEVBUF);