Konstantin Belousov bd50262f70 PTI for amd64.
The implementation of the Kernel Page Table Isolation (KPTI) for
amd64, first version. It provides a workaround for the 'meltdown'
vulnerability.  PTI is turned off by default for now, enable with the
loader tunable vm.pmap.pti=1.

The pmap page table is split into kernel-mode table and user-mode
table. Kernel-mode table is identical to the non-PTI table, while
usermode table is obtained from kernel table by leaving userspace
mappings intact, but only leaving the following parts of the kernel
mapped:

    kernel text (but not modules text)
    PCPU
    GDT/IDT/user LDT/task structures
    IST stacks for NMI and doublefault handlers.

Kernel switches to user page table before returning to usermode, and
restores full kernel page table on the entry. Initial kernel-mode
stack for PTI trampoline is allocated in PCPU, it is only 16
qwords.  Kernel entry trampoline switches page tables. then the
hardware trap frame is copied to the normal kstack, and execution
continues.

IST stacks are kept mapped and no trampoline is needed for
NMI/doublefault, but of course page table switch is performed.

On return to usermode, the trampoline is used again, iret frame is
copied to the trampoline stack, page tables are switched and iretq is
executed.  The case of iretq faulting due to the invalid usermode
context is tricky, since the frame for fault is appended to the
trampoline frame.  Besides copying the fault frame and original
(corrupted) frame to kstack, the fault frame must be patched to make
it look as if the fault occured on the kstack, see the comment in
doret_iret detection code in trap().

Currently kernel pages which are mapped during trampoline operation
are identical for all pmaps.  They are registered using
pmap_pti_add_kva().  Besides initial registrations done during boot,
LDT and non-common TSS segments are registered if user requested their
use.  In principle, they can be installed into kernel page table per
pmap with some work.  Similarly, PCPU can be hidden from userspace
mapping using trampoline PCPU page, but again I do not see much
benefits besides complexity.

PDPE pages for the kernel half of the user page tables are
pre-allocated during boot because we need to know pml4 entries which
are copied to the top-level paging structure page, in advance on a new
pmap creation.  I enforce this to avoid iterating over the all
existing pmaps if a new PDPE page is needed for PTI kernel mappings.
The iteration is a known problematic operation on i386.

The need to flush hidden kernel translations on the switch to user
mode make global tables (PG_G) meaningless and even harming, so PG_G
use is disabled for PTI case.  Our existing use of PCID is
incompatible with PTI and is automatically disabled if PTI is
enabled.  PCID can be forced on only for developer's benefit.

MCE is known to be broken, it requires IST stack to operate completely
correctly even for non-PTI case, and absolutely needs dedicated IST
stack because MCE delivery while trampoline did not switched from PTI
stack is fatal.  The fix is pending.

Reviewed by:	markj (partially)
Tested by:	pho (previous version)
Discussed with:	jeff, jhb
Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
2018-01-17 11:44:21 +00:00

1554 lines
38 KiB
C

/*-
* Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* VM Bus Driver Implementation
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <machine/bus.h>
#include <machine/intr_machdep.h>
#include <machine/md_var.h>
#include <machine/resource.h>
#include <x86/include/apicvar.h>
#include <contrib/dev/acpica/include/acpi.h>
#include <dev/acpica/acpivar.h>
#include <dev/hyperv/include/hyperv.h>
#include <dev/hyperv/include/vmbus_xact.h>
#include <dev/hyperv/vmbus/hyperv_reg.h>
#include <dev/hyperv/vmbus/hyperv_var.h>
#include <dev/hyperv/vmbus/vmbus_reg.h>
#include <dev/hyperv/vmbus/vmbus_var.h>
#include <dev/hyperv/vmbus/vmbus_chanvar.h>
#include "acpi_if.h"
#include "pcib_if.h"
#include "vmbus_if.h"
#define VMBUS_GPADL_START 0xe1e10
struct vmbus_msghc {
struct vmbus_xact *mh_xact;
struct hypercall_postmsg_in mh_inprm_save;
};
static void vmbus_identify(driver_t *, device_t);
static int vmbus_probe(device_t);
static int vmbus_attach(device_t);
static int vmbus_detach(device_t);
static int vmbus_read_ivar(device_t, device_t, int,
uintptr_t *);
static int vmbus_child_pnpinfo_str(device_t, device_t,
char *, size_t);
static struct resource *vmbus_alloc_resource(device_t dev,
device_t child, int type, int *rid,
rman_res_t start, rman_res_t end,
rman_res_t count, u_int flags);
static int vmbus_alloc_msi(device_t bus, device_t dev,
int count, int maxcount, int *irqs);
static int vmbus_release_msi(device_t bus, device_t dev,
int count, int *irqs);
static int vmbus_alloc_msix(device_t bus, device_t dev,
int *irq);
static int vmbus_release_msix(device_t bus, device_t dev,
int irq);
static int vmbus_map_msi(device_t bus, device_t dev,
int irq, uint64_t *addr, uint32_t *data);
static uint32_t vmbus_get_version_method(device_t, device_t);
static int vmbus_probe_guid_method(device_t, device_t,
const struct hyperv_guid *);
static uint32_t vmbus_get_vcpu_id_method(device_t bus,
device_t dev, int cpu);
static struct taskqueue *vmbus_get_eventtq_method(device_t, device_t,
int);
#ifdef EARLY_AP_STARTUP
static void vmbus_intrhook(void *);
#endif
static int vmbus_init(struct vmbus_softc *);
static int vmbus_connect(struct vmbus_softc *, uint32_t);
static int vmbus_req_channels(struct vmbus_softc *sc);
static void vmbus_disconnect(struct vmbus_softc *);
static int vmbus_scan(struct vmbus_softc *);
static void vmbus_scan_teardown(struct vmbus_softc *);
static void vmbus_scan_done(struct vmbus_softc *,
const struct vmbus_message *);
static void vmbus_chanmsg_handle(struct vmbus_softc *,
const struct vmbus_message *);
static void vmbus_msg_task(void *, int);
static void vmbus_synic_setup(void *);
static void vmbus_synic_teardown(void *);
static int vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
static int vmbus_dma_alloc(struct vmbus_softc *);
static void vmbus_dma_free(struct vmbus_softc *);
static int vmbus_intr_setup(struct vmbus_softc *);
static void vmbus_intr_teardown(struct vmbus_softc *);
static int vmbus_doattach(struct vmbus_softc *);
static void vmbus_event_proc_dummy(struct vmbus_softc *,
int);
static struct vmbus_softc *vmbus_sc;
SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
"Hyper-V vmbus");
static int vmbus_pin_evttask = 1;
SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
&vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
extern inthand_t IDTVEC(vmbus_isr), IDTVEC(vmbus_isr_pti);
static const uint32_t vmbus_version[] = {
VMBUS_VERSION_WIN8_1,
VMBUS_VERSION_WIN8,
VMBUS_VERSION_WIN7,
VMBUS_VERSION_WS2008
};
static const vmbus_chanmsg_proc_t
vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
};
static device_method_t vmbus_methods[] = {
/* Device interface */
DEVMETHOD(device_identify, vmbus_identify),
DEVMETHOD(device_probe, vmbus_probe),
DEVMETHOD(device_attach, vmbus_attach),
DEVMETHOD(device_detach, vmbus_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
DEVMETHOD(device_suspend, bus_generic_suspend),
DEVMETHOD(device_resume, bus_generic_resume),
/* Bus interface */
DEVMETHOD(bus_add_child, bus_generic_add_child),
DEVMETHOD(bus_print_child, bus_generic_print_child),
DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str),
DEVMETHOD(bus_alloc_resource, vmbus_alloc_resource),
DEVMETHOD(bus_release_resource, bus_generic_release_resource),
DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
#if __FreeBSD_version >= 1100000
DEVMETHOD(bus_get_cpus, bus_generic_get_cpus),
#endif
/* pcib interface */
DEVMETHOD(pcib_alloc_msi, vmbus_alloc_msi),
DEVMETHOD(pcib_release_msi, vmbus_release_msi),
DEVMETHOD(pcib_alloc_msix, vmbus_alloc_msix),
DEVMETHOD(pcib_release_msix, vmbus_release_msix),
DEVMETHOD(pcib_map_msi, vmbus_map_msi),
/* Vmbus interface */
DEVMETHOD(vmbus_get_version, vmbus_get_version_method),
DEVMETHOD(vmbus_probe_guid, vmbus_probe_guid_method),
DEVMETHOD(vmbus_get_vcpu_id, vmbus_get_vcpu_id_method),
DEVMETHOD(vmbus_get_event_taskq, vmbus_get_eventtq_method),
DEVMETHOD_END
};
static driver_t vmbus_driver = {
"vmbus",
vmbus_methods,
sizeof(struct vmbus_softc)
};
static devclass_t vmbus_devclass;
DRIVER_MODULE(vmbus, pcib, vmbus_driver, vmbus_devclass, NULL, NULL);
DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, vmbus_devclass,
NULL, NULL);
MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
MODULE_DEPEND(vmbus, pci, 1, 1, 1);
MODULE_VERSION(vmbus, 1);
static __inline struct vmbus_softc *
vmbus_get_softc(void)
{
return vmbus_sc;
}
void
vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
{
struct hypercall_postmsg_in *inprm;
if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
panic("invalid data size %zu", dsize);
inprm = vmbus_xact_req_data(mh->mh_xact);
memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
inprm->hc_connid = VMBUS_CONNID_MESSAGE;
inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
inprm->hc_dsize = dsize;
}
struct vmbus_msghc *
vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
{
struct vmbus_msghc *mh;
struct vmbus_xact *xact;
if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
panic("invalid data size %zu", dsize);
xact = vmbus_xact_get(sc->vmbus_xc,
dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
if (xact == NULL)
return (NULL);
mh = vmbus_xact_priv(xact, sizeof(*mh));
mh->mh_xact = xact;
vmbus_msghc_reset(mh, dsize);
return (mh);
}
void
vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
{
vmbus_xact_put(mh->mh_xact);
}
void *
vmbus_msghc_dataptr(struct vmbus_msghc *mh)
{
struct hypercall_postmsg_in *inprm;
inprm = vmbus_xact_req_data(mh->mh_xact);
return (inprm->hc_data);
}
int
vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
{
sbintime_t time = SBT_1MS;
struct hypercall_postmsg_in *inprm;
bus_addr_t inprm_paddr;
int i;
inprm = vmbus_xact_req_data(mh->mh_xact);
inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
/*
* Save the input parameter so that we could restore the input
* parameter if the Hypercall failed.
*
* XXX
* Is this really necessary?! i.e. Will the Hypercall ever
* overwrite the input parameter?
*/
memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
/*
* In order to cope with transient failures, e.g. insufficient
* resources on host side, we retry the post message Hypercall
* several times. 20 retries seem sufficient.
*/
#define HC_RETRY_MAX 20
for (i = 0; i < HC_RETRY_MAX; ++i) {
uint64_t status;
status = hypercall_post_message(inprm_paddr);
if (status == HYPERCALL_STATUS_SUCCESS)
return 0;
pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
if (time < SBT_1S * 2)
time *= 2;
/* Restore input parameter and try again */
memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
}
#undef HC_RETRY_MAX
return EIO;
}
int
vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
{
int error;
vmbus_xact_activate(mh->mh_xact);
error = vmbus_msghc_exec_noresult(mh);
if (error)
vmbus_xact_deactivate(mh->mh_xact);
return error;
}
void
vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
{
vmbus_xact_deactivate(mh->mh_xact);
}
const struct vmbus_message *
vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
{
size_t resp_len;
return (vmbus_xact_wait(mh->mh_xact, &resp_len));
}
const struct vmbus_message *
vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
{
size_t resp_len;
return (vmbus_xact_poll(mh->mh_xact, &resp_len));
}
void
vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
{
vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
}
uint32_t
vmbus_gpadl_alloc(struct vmbus_softc *sc)
{
uint32_t gpadl;
again:
gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
if (gpadl == 0)
goto again;
return (gpadl);
}
static int
vmbus_connect(struct vmbus_softc *sc, uint32_t version)
{
struct vmbus_chanmsg_connect *req;
const struct vmbus_message *msg;
struct vmbus_msghc *mh;
int error, done = 0;
mh = vmbus_msghc_get(sc, sizeof(*req));
if (mh == NULL)
return ENXIO;
req = vmbus_msghc_dataptr(mh);
req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
req->chm_ver = version;
req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
error = vmbus_msghc_exec(sc, mh);
if (error) {
vmbus_msghc_put(sc, mh);
return error;
}
msg = vmbus_msghc_wait_result(sc, mh);
done = ((const struct vmbus_chanmsg_connect_resp *)
msg->msg_data)->chm_done;
vmbus_msghc_put(sc, mh);
return (done ? 0 : EOPNOTSUPP);
}
static int
vmbus_init(struct vmbus_softc *sc)
{
int i;
for (i = 0; i < nitems(vmbus_version); ++i) {
int error;
error = vmbus_connect(sc, vmbus_version[i]);
if (!error) {
sc->vmbus_version = vmbus_version[i];
device_printf(sc->vmbus_dev, "version %u.%u\n",
VMBUS_VERSION_MAJOR(sc->vmbus_version),
VMBUS_VERSION_MINOR(sc->vmbus_version));
return 0;
}
}
return ENXIO;
}
static void
vmbus_disconnect(struct vmbus_softc *sc)
{
struct vmbus_chanmsg_disconnect *req;
struct vmbus_msghc *mh;
int error;
mh = vmbus_msghc_get(sc, sizeof(*req));
if (mh == NULL) {
device_printf(sc->vmbus_dev,
"can not get msg hypercall for disconnect\n");
return;
}
req = vmbus_msghc_dataptr(mh);
req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
error = vmbus_msghc_exec_noresult(mh);
vmbus_msghc_put(sc, mh);
if (error) {
device_printf(sc->vmbus_dev,
"disconnect msg hypercall failed\n");
}
}
static int
vmbus_req_channels(struct vmbus_softc *sc)
{
struct vmbus_chanmsg_chrequest *req;
struct vmbus_msghc *mh;
int error;
mh = vmbus_msghc_get(sc, sizeof(*req));
if (mh == NULL)
return ENXIO;
req = vmbus_msghc_dataptr(mh);
req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
error = vmbus_msghc_exec_noresult(mh);
vmbus_msghc_put(sc, mh);
return error;
}
static void
vmbus_scan_done_task(void *xsc, int pending __unused)
{
struct vmbus_softc *sc = xsc;
mtx_lock(&Giant);
sc->vmbus_scandone = true;
mtx_unlock(&Giant);
wakeup(&sc->vmbus_scandone);
}
static void
vmbus_scan_done(struct vmbus_softc *sc,
const struct vmbus_message *msg __unused)
{
taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
}
static int
vmbus_scan(struct vmbus_softc *sc)
{
int error;
/*
* Identify, probe and attach for non-channel devices.
*/
bus_generic_probe(sc->vmbus_dev);
bus_generic_attach(sc->vmbus_dev);
/*
* This taskqueue serializes vmbus devices' attach and detach
* for channel offer and rescind messages.
*/
sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
taskqueue_thread_enqueue, &sc->vmbus_devtq);
taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
/*
* This taskqueue handles sub-channel detach, so that vmbus
* device's detach running in vmbus_devtq can drain its sub-
* channels.
*/
sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
taskqueue_thread_enqueue, &sc->vmbus_subchtq);
taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
/*
* Start vmbus scanning.
*/
error = vmbus_req_channels(sc);
if (error) {
device_printf(sc->vmbus_dev, "channel request failed: %d\n",
error);
return (error);
}
/*
* Wait for all vmbus devices from the initial channel offers to be
* attached.
*/
GIANT_REQUIRED;
while (!sc->vmbus_scandone)
mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
if (bootverbose) {
device_printf(sc->vmbus_dev, "device scan, probe and attach "
"done\n");
}
return (0);
}
static void
vmbus_scan_teardown(struct vmbus_softc *sc)
{
GIANT_REQUIRED;
if (sc->vmbus_devtq != NULL) {
mtx_unlock(&Giant);
taskqueue_free(sc->vmbus_devtq);
mtx_lock(&Giant);
sc->vmbus_devtq = NULL;
}
if (sc->vmbus_subchtq != NULL) {
mtx_unlock(&Giant);
taskqueue_free(sc->vmbus_subchtq);
mtx_lock(&Giant);
sc->vmbus_subchtq = NULL;
}
}
static void
vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
{
vmbus_chanmsg_proc_t msg_proc;
uint32_t msg_type;
msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
msg_type);
return;
}
msg_proc = vmbus_chanmsg_handlers[msg_type];
if (msg_proc != NULL)
msg_proc(sc, msg);
/* Channel specific processing */
vmbus_chan_msgproc(sc, msg);
}
static void
vmbus_msg_task(void *xsc, int pending __unused)
{
struct vmbus_softc *sc = xsc;
volatile struct vmbus_message *msg;
msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
for (;;) {
if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
/* No message */
break;
} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
/* Channel message */
vmbus_chanmsg_handle(sc,
__DEVOLATILE(const struct vmbus_message *, msg));
}
msg->msg_type = HYPERV_MSGTYPE_NONE;
/*
* Make sure the write to msg_type (i.e. set to
* HYPERV_MSGTYPE_NONE) happens before we read the
* msg_flags and EOMing. Otherwise, the EOMing will
* not deliver any more messages since there is no
* empty slot
*
* NOTE:
* mb() is used here, since atomic_thread_fence_seq_cst()
* will become compiler fence on UP kernel.
*/
mb();
if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
/*
* This will cause message queue rescan to possibly
* deliver another msg from the hypervisor
*/
wrmsr(MSR_HV_EOM, 0);
}
}
}
static __inline int
vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
{
volatile struct vmbus_message *msg;
struct vmbus_message *msg_base;
msg_base = VMBUS_PCPU_GET(sc, message, cpu);
/*
* Check event timer.
*
* TODO: move this to independent IDT vector.
*/
msg = msg_base + VMBUS_SINT_TIMER;
if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
msg->msg_type = HYPERV_MSGTYPE_NONE;
vmbus_et_intr(frame);
/*
* Make sure the write to msg_type (i.e. set to
* HYPERV_MSGTYPE_NONE) happens before we read the
* msg_flags and EOMing. Otherwise, the EOMing will
* not deliver any more messages since there is no
* empty slot
*
* NOTE:
* mb() is used here, since atomic_thread_fence_seq_cst()
* will become compiler fence on UP kernel.
*/
mb();
if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
/*
* This will cause message queue rescan to possibly
* deliver another msg from the hypervisor
*/
wrmsr(MSR_HV_EOM, 0);
}
}
/*
* Check events. Hot path for network and storage I/O data; high rate.
*
* NOTE:
* As recommended by the Windows guest fellows, we check events before
* checking messages.
*/
sc->vmbus_event_proc(sc, cpu);
/*
* Check messages. Mainly management stuffs; ultra low rate.
*/
msg = msg_base + VMBUS_SINT_MESSAGE;
if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
VMBUS_PCPU_PTR(sc, message_task, cpu));
}
return (FILTER_HANDLED);
}
void
vmbus_handle_intr(struct trapframe *trap_frame)
{
struct vmbus_softc *sc = vmbus_get_softc();
int cpu = curcpu;
/*
* Disable preemption.
*/
critical_enter();
/*
* Do a little interrupt counting.
*/
(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
vmbus_handle_intr1(sc, trap_frame, cpu);
/*
* Enable preemption.
*/
critical_exit();
}
static void
vmbus_synic_setup(void *xsc)
{
struct vmbus_softc *sc = xsc;
int cpu = curcpu;
uint64_t val, orig;
uint32_t sint;
if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
/* Save virtual processor id. */
VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
} else {
/* Set virtual processor id to 0 for compatibility. */
VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
}
/*
* Setup the SynIC message.
*/
orig = rdmsr(MSR_HV_SIMP);
val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
MSR_HV_SIMP_PGSHIFT);
wrmsr(MSR_HV_SIMP, val);
/*
* Setup the SynIC event flags.
*/
orig = rdmsr(MSR_HV_SIEFP);
val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
>> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
wrmsr(MSR_HV_SIEFP, val);
/*
* Configure and unmask SINT for message and event flags.
*/
sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
orig = rdmsr(sint);
val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
(orig & MSR_HV_SINT_RSVD_MASK);
wrmsr(sint, val);
/*
* Configure and unmask SINT for timer.
*/
sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
orig = rdmsr(sint);
val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
(orig & MSR_HV_SINT_RSVD_MASK);
wrmsr(sint, val);
/*
* All done; enable SynIC.
*/
orig = rdmsr(MSR_HV_SCONTROL);
val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
wrmsr(MSR_HV_SCONTROL, val);
}
static void
vmbus_synic_teardown(void *arg)
{
uint64_t orig;
uint32_t sint;
/*
* Disable SynIC.
*/
orig = rdmsr(MSR_HV_SCONTROL);
wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
/*
* Mask message and event flags SINT.
*/
sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
orig = rdmsr(sint);
wrmsr(sint, orig | MSR_HV_SINT_MASKED);
/*
* Mask timer SINT.
*/
sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
orig = rdmsr(sint);
wrmsr(sint, orig | MSR_HV_SINT_MASKED);
/*
* Teardown SynIC message.
*/
orig = rdmsr(MSR_HV_SIMP);
wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
/*
* Teardown SynIC event flags.
*/
orig = rdmsr(MSR_HV_SIEFP);
wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
}
static int
vmbus_dma_alloc(struct vmbus_softc *sc)
{
bus_dma_tag_t parent_dtag;
uint8_t *evtflags;
int cpu;
parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
CPU_FOREACH(cpu) {
void *ptr;
/*
* Per-cpu messages and event flags.
*/
ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
BUS_DMA_WAITOK | BUS_DMA_ZERO);
if (ptr == NULL)
return ENOMEM;
VMBUS_PCPU_GET(sc, message, cpu) = ptr;
ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
BUS_DMA_WAITOK | BUS_DMA_ZERO);
if (ptr == NULL)
return ENOMEM;
VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
}
evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
if (evtflags == NULL)
return ENOMEM;
sc->vmbus_rx_evtflags = (u_long *)evtflags;
sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
sc->vmbus_evtflags = evtflags;
sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
if (sc->vmbus_mnf1 == NULL)
return ENOMEM;
sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
BUS_DMA_WAITOK | BUS_DMA_ZERO);
if (sc->vmbus_mnf2 == NULL)
return ENOMEM;
return 0;
}
static void
vmbus_dma_free(struct vmbus_softc *sc)
{
int cpu;
if (sc->vmbus_evtflags != NULL) {
hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
sc->vmbus_evtflags = NULL;
sc->vmbus_rx_evtflags = NULL;
sc->vmbus_tx_evtflags = NULL;
}
if (sc->vmbus_mnf1 != NULL) {
hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
sc->vmbus_mnf1 = NULL;
}
if (sc->vmbus_mnf2 != NULL) {
hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
sc->vmbus_mnf2 = NULL;
}
CPU_FOREACH(cpu) {
if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
hyperv_dmamem_free(
VMBUS_PCPU_PTR(sc, message_dma, cpu),
VMBUS_PCPU_GET(sc, message, cpu));
VMBUS_PCPU_GET(sc, message, cpu) = NULL;
}
if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
hyperv_dmamem_free(
VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
VMBUS_PCPU_GET(sc, event_flags, cpu));
VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
}
}
}
static int
vmbus_intr_setup(struct vmbus_softc *sc)
{
int cpu;
CPU_FOREACH(cpu) {
char buf[MAXCOMLEN + 1];
cpuset_t cpu_mask;
/* Allocate an interrupt counter for Hyper-V interrupt */
snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
/*
* Setup taskqueue to handle events. Task will be per-
* channel.
*/
VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
"hyperv event", M_WAITOK, taskqueue_thread_enqueue,
VMBUS_PCPU_PTR(sc, event_tq, cpu));
if (vmbus_pin_evttask) {
CPU_SETOF(cpu, &cpu_mask);
taskqueue_start_threads_cpuset(
VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
&cpu_mask, "hvevent%d", cpu);
} else {
taskqueue_start_threads(
VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
"hvevent%d", cpu);
}
/*
* Setup tasks and taskqueues to handle messages.
*/
VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
"hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
VMBUS_PCPU_PTR(sc, message_tq, cpu));
CPU_SETOF(cpu, &cpu_mask);
taskqueue_start_threads_cpuset(
VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
"hvmsg%d", cpu);
TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
vmbus_msg_task, sc);
}
/*
* All Hyper-V ISR required resources are setup, now let's find a
* free IDT vector for Hyper-V ISR and set it up.
*/
sc->vmbus_idtvec = lapic_ipi_alloc(pti ? IDTVEC(vmbus_isr_pti) :
IDTVEC(vmbus_isr));
if (sc->vmbus_idtvec < 0) {
device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
return ENXIO;
}
if (bootverbose) {
device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
sc->vmbus_idtvec);
}
return 0;
}
static void
vmbus_intr_teardown(struct vmbus_softc *sc)
{
int cpu;
if (sc->vmbus_idtvec >= 0) {
lapic_ipi_free(sc->vmbus_idtvec);
sc->vmbus_idtvec = -1;
}
CPU_FOREACH(cpu) {
if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
}
if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
VMBUS_PCPU_PTR(sc, message_task, cpu));
taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
}
}
}
static int
vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
return (ENOENT);
}
static int
vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
{
const struct vmbus_channel *chan;
char guidbuf[HYPERV_GUID_STRLEN];
chan = vmbus_get_channel(child);
if (chan == NULL) {
/* Event timer device, which does not belong to a channel */
return (0);
}
strlcat(buf, "classid=", buflen);
hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
strlcat(buf, guidbuf, buflen);
strlcat(buf, " deviceid=", buflen);
hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
strlcat(buf, guidbuf, buflen);
return (0);
}
int
vmbus_add_child(struct vmbus_channel *chan)
{
struct vmbus_softc *sc = chan->ch_vmbus;
device_t parent = sc->vmbus_dev;
mtx_lock(&Giant);
chan->ch_dev = device_add_child(parent, NULL, -1);
if (chan->ch_dev == NULL) {
mtx_unlock(&Giant);
device_printf(parent, "device_add_child for chan%u failed\n",
chan->ch_id);
return (ENXIO);
}
device_set_ivars(chan->ch_dev, chan);
device_probe_and_attach(chan->ch_dev);
mtx_unlock(&Giant);
return (0);
}
int
vmbus_delete_child(struct vmbus_channel *chan)
{
int error = 0;
mtx_lock(&Giant);
if (chan->ch_dev != NULL) {
error = device_delete_child(chan->ch_vmbus->vmbus_dev,
chan->ch_dev);
chan->ch_dev = NULL;
}
mtx_unlock(&Giant);
return (error);
}
static int
vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
{
struct vmbus_softc *sc = arg1;
char verstr[16];
snprintf(verstr, sizeof(verstr), "%u.%u",
VMBUS_VERSION_MAJOR(sc->vmbus_version),
VMBUS_VERSION_MINOR(sc->vmbus_version));
return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
}
/*
* We need the function to make sure the MMIO resource is allocated from the
* ranges found in _CRS.
*
* For the release function, we can use bus_generic_release_resource().
*/
static struct resource *
vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
device_t parent = device_get_parent(dev);
struct resource *res;
#ifdef NEW_PCIB
if (type == SYS_RES_MEMORY) {
struct vmbus_softc *sc = device_get_softc(dev);
res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
rid, start, end, count, flags);
} else
#endif
{
res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
end, count, flags);
}
return (res);
}
static int
vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
{
return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
irqs));
}
static int
vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
{
return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
}
static int
vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
{
return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
}
static int
vmbus_release_msix(device_t bus, device_t dev, int irq)
{
return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
}
static int
vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
uint32_t *data)
{
return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
}
static uint32_t
vmbus_get_version_method(device_t bus, device_t dev)
{
struct vmbus_softc *sc = device_get_softc(bus);
return sc->vmbus_version;
}
static int
vmbus_probe_guid_method(device_t bus, device_t dev,
const struct hyperv_guid *guid)
{
const struct vmbus_channel *chan = vmbus_get_channel(dev);
if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
return 0;
return ENXIO;
}
static uint32_t
vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
{
const struct vmbus_softc *sc = device_get_softc(bus);
return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
}
static struct taskqueue *
vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
{
const struct vmbus_softc *sc = device_get_softc(bus);
KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
return (VMBUS_PCPU_GET(sc, event_tq, cpu));
}
#ifdef NEW_PCIB
#define VTPM_BASE_ADDR 0xfed40000
#define FOUR_GB (1ULL << 32)
enum parse_pass { parse_64, parse_32 };
struct parse_context {
device_t vmbus_dev;
enum parse_pass pass;
};
static ACPI_STATUS
parse_crs(ACPI_RESOURCE *res, void *ctx)
{
const struct parse_context *pc = ctx;
device_t vmbus_dev = pc->vmbus_dev;
struct vmbus_softc *sc = device_get_softc(vmbus_dev);
UINT64 start, end;
switch (res->Type) {
case ACPI_RESOURCE_TYPE_ADDRESS32:
start = res->Data.Address32.Address.Minimum;
end = res->Data.Address32.Address.Maximum;
break;
case ACPI_RESOURCE_TYPE_ADDRESS64:
start = res->Data.Address64.Address.Minimum;
end = res->Data.Address64.Address.Maximum;
break;
default:
/* Unused types. */
return (AE_OK);
}
/*
* We don't use <1MB addresses.
*/
if (end < 0x100000)
return (AE_OK);
/* Don't conflict with vTPM. */
if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
end = VTPM_BASE_ADDR - 1;
if ((pc->pass == parse_32 && start < FOUR_GB) ||
(pc->pass == parse_64 && start >= FOUR_GB))
pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
start, end, 0);
return (AE_OK);
}
static void
vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
{
struct parse_context pc;
ACPI_STATUS status;
if (bootverbose)
device_printf(dev, "walking _CRS, pass=%d\n", pass);
pc.vmbus_dev = vmbus_dev;
pc.pass = pass;
status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
parse_crs, &pc);
if (bootverbose && ACPI_FAILURE(status))
device_printf(dev, "_CRS: not found, pass=%d\n", pass);
}
static void
vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
{
device_t acpi0, parent;
parent = device_get_parent(dev);
acpi0 = device_get_parent(parent);
if (strcmp("acpi0", device_get_nameunit(acpi0)) == 0) {
device_t *children;
int count;
/*
* Try to locate VMBUS resources and find _CRS on them.
*/
if (device_get_children(acpi0, &children, &count) == 0) {
int i;
for (i = 0; i < count; ++i) {
if (!device_is_attached(children[i]))
continue;
if (strcmp("vmbus_res",
device_get_name(children[i])) == 0)
vmbus_get_crs(children[i], dev, pass);
}
free(children, M_TEMP);
}
/*
* Try to find _CRS on acpi.
*/
vmbus_get_crs(acpi0, dev, pass);
} else {
device_printf(dev, "not grandchild of acpi\n");
}
/*
* Try to find _CRS on parent.
*/
vmbus_get_crs(parent, dev, pass);
}
static void
vmbus_get_mmio_res(device_t dev)
{
struct vmbus_softc *sc = device_get_softc(dev);
/*
* We walk the resources twice to make sure that: in the resource
* list, the 32-bit resources appear behind the 64-bit resources.
* NB: resource_list_add() uses INSERT_TAIL. This way, when we
* iterate through the list to find a range for a 64-bit BAR in
* vmbus_alloc_resource(), we can make sure we try to use >4GB
* ranges first.
*/
pcib_host_res_init(dev, &sc->vmbus_mmio_res);
vmbus_get_mmio_res_pass(dev, parse_64);
vmbus_get_mmio_res_pass(dev, parse_32);
}
static void
vmbus_free_mmio_res(device_t dev)
{
struct vmbus_softc *sc = device_get_softc(dev);
pcib_host_res_free(dev, &sc->vmbus_mmio_res);
}
#endif /* NEW_PCIB */
static void
vmbus_identify(driver_t *driver, device_t parent)
{
if (device_get_unit(parent) != 0 || vm_guest != VM_GUEST_HV ||
(hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
return;
device_add_child(parent, "vmbus", -1);
}
static int
vmbus_probe(device_t dev)
{
if (device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
(hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
return (ENXIO);
device_set_desc(dev, "Hyper-V Vmbus");
return (BUS_PROBE_DEFAULT);
}
/**
* @brief Main vmbus driver initialization routine.
*
* Here, we
* - initialize the vmbus driver context
* - setup various driver entry points
* - invoke the vmbus hv main init routine
* - get the irq resource
* - invoke the vmbus to add the vmbus root device
* - setup the vmbus root device
* - retrieve the channel offers
*/
static int
vmbus_doattach(struct vmbus_softc *sc)
{
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
int ret;
if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
return (0);
#ifdef NEW_PCIB
vmbus_get_mmio_res(sc->vmbus_dev);
#endif
sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
sc->vmbus_gpadl = VMBUS_GPADL_START;
mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
TAILQ_INIT(&sc->vmbus_prichans);
mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
TAILQ_INIT(&sc->vmbus_chans);
sc->vmbus_chmap = malloc(
sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
M_WAITOK | M_ZERO);
/*
* Create context for "post message" Hypercalls
*/
sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
sizeof(struct vmbus_msghc));
if (sc->vmbus_xc == NULL) {
ret = ENXIO;
goto cleanup;
}
/*
* Allocate DMA stuffs.
*/
ret = vmbus_dma_alloc(sc);
if (ret != 0)
goto cleanup;
/*
* Setup interrupt.
*/
ret = vmbus_intr_setup(sc);
if (ret != 0)
goto cleanup;
/*
* Setup SynIC.
*/
if (bootverbose)
device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
/*
* Initialize vmbus, e.g. connect to Hypervisor.
*/
ret = vmbus_init(sc);
if (ret != 0)
goto cleanup;
if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
sc->vmbus_version == VMBUS_VERSION_WIN7)
sc->vmbus_event_proc = vmbus_event_proc_compat;
else
sc->vmbus_event_proc = vmbus_event_proc;
ret = vmbus_scan(sc);
if (ret != 0)
goto cleanup;
ctx = device_get_sysctl_ctx(sc->vmbus_dev);
child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
vmbus_sysctl_version, "A", "vmbus version");
return (ret);
cleanup:
vmbus_scan_teardown(sc);
vmbus_intr_teardown(sc);
vmbus_dma_free(sc);
if (sc->vmbus_xc != NULL) {
vmbus_xact_ctx_destroy(sc->vmbus_xc);
sc->vmbus_xc = NULL;
}
free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
mtx_destroy(&sc->vmbus_prichan_lock);
mtx_destroy(&sc->vmbus_chan_lock);
return (ret);
}
static void
vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
{
}
#ifdef EARLY_AP_STARTUP
static void
vmbus_intrhook(void *xsc)
{
struct vmbus_softc *sc = xsc;
if (bootverbose)
device_printf(sc->vmbus_dev, "intrhook\n");
vmbus_doattach(sc);
config_intrhook_disestablish(&sc->vmbus_intrhook);
}
#endif /* EARLY_AP_STARTUP */
static int
vmbus_attach(device_t dev)
{
vmbus_sc = device_get_softc(dev);
vmbus_sc->vmbus_dev = dev;
vmbus_sc->vmbus_idtvec = -1;
/*
* Event processing logic will be configured:
* - After the vmbus protocol version negotiation.
* - Before we request channel offers.
*/
vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
#ifdef EARLY_AP_STARTUP
/*
* Defer the real attach until the pause(9) works as expected.
*/
vmbus_sc->vmbus_intrhook.ich_func = vmbus_intrhook;
vmbus_sc->vmbus_intrhook.ich_arg = vmbus_sc;
config_intrhook_establish(&vmbus_sc->vmbus_intrhook);
#else /* !EARLY_AP_STARTUP */
/*
* If the system has already booted and thread
* scheduling is possible indicated by the global
* cold set to zero, we just call the driver
* initialization directly.
*/
if (!cold)
vmbus_doattach(vmbus_sc);
#endif /* EARLY_AP_STARTUP */
return (0);
}
static int
vmbus_detach(device_t dev)
{
struct vmbus_softc *sc = device_get_softc(dev);
bus_generic_detach(dev);
vmbus_chan_destroy_all(sc);
vmbus_scan_teardown(sc);
vmbus_disconnect(sc);
if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
}
vmbus_intr_teardown(sc);
vmbus_dma_free(sc);
if (sc->vmbus_xc != NULL) {
vmbus_xact_ctx_destroy(sc->vmbus_xc);
sc->vmbus_xc = NULL;
}
free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
mtx_destroy(&sc->vmbus_prichan_lock);
mtx_destroy(&sc->vmbus_chan_lock);
#ifdef NEW_PCIB
vmbus_free_mmio_res(dev);
#endif
return (0);
}
#ifndef EARLY_AP_STARTUP
static void
vmbus_sysinit(void *arg __unused)
{
struct vmbus_softc *sc = vmbus_get_softc();
if (vm_guest != VM_GUEST_HV || sc == NULL)
return;
/*
* If the system has already booted and thread
* scheduling is possible, as indicated by the
* global cold set to zero, we just call the driver
* initialization directly.
*/
if (!cold)
vmbus_doattach(sc);
}
/*
* NOTE:
* We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
* initialized.
*/
SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
#endif /* !EARLY_AP_STARTUP */