2013-05-03 02:03:36 +00:00
|
|
|
/*-
|
2016-04-22 05:01:43 +00:00
|
|
|
* Copyright (c) 2009-2012,2016 Microsoft Corp.
|
2013-05-03 02:03:36 +00:00
|
|
|
* Copyright (c) 2012 NetApp Inc.
|
|
|
|
* Copyright (c) 2012 Citrix Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice unmodified, this list of conditions, and the following
|
|
|
|
* disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* VM Bus Driver Implementation
|
|
|
|
*/
|
2013-10-10 16:25:53 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
2013-05-03 02:03:36 +00:00
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/bus.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/module.h>
|
2016-01-14 03:05:10 +00:00
|
|
|
#include <sys/proc.h>
|
2013-05-03 02:03:36 +00:00
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/syslog.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/rtprio.h>
|
|
|
|
#include <sys/interrupt.h>
|
|
|
|
#include <sys/sx.h>
|
|
|
|
#include <sys/taskqueue.h>
|
|
|
|
#include <sys/mutex.h>
|
|
|
|
#include <sys/smp.h>
|
|
|
|
|
|
|
|
#include <machine/resource.h>
|
|
|
|
#include <sys/rman.h>
|
|
|
|
|
|
|
|
#include <machine/stdarg.h>
|
|
|
|
#include <machine/intr_machdep.h>
|
2015-04-29 10:12:34 +00:00
|
|
|
#include <machine/md_var.h>
|
|
|
|
#include <machine/segments.h>
|
2013-05-03 02:03:36 +00:00
|
|
|
#include <sys/pcpu.h>
|
2015-04-29 10:12:34 +00:00
|
|
|
#include <x86/apicvar.h>
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-03-21 06:54:21 +00:00
|
|
|
#include <dev/hyperv/include/hyperv.h>
|
2016-05-18 03:19:53 +00:00
|
|
|
#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
|
2016-05-25 05:53:12 +00:00
|
|
|
#include <dev/hyperv/vmbus/hyperv_reg.h>
|
2016-05-27 07:29:31 +00:00
|
|
|
#include <dev/hyperv/vmbus/hyperv_var.h>
|
2016-05-31 05:18:55 +00:00
|
|
|
#include <dev/hyperv/vmbus/vmbus_reg.h>
|
2016-05-18 03:19:53 +00:00
|
|
|
#include <dev/hyperv/vmbus/vmbus_var.h>
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-01-14 02:50:13 +00:00
|
|
|
#include <contrib/dev/acpica/include/acpi.h>
|
|
|
|
#include "acpi_if.h"
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-18 03:19:53 +00:00
|
|
|
struct vmbus_softc *vmbus_sc;
|
|
|
|
|
2016-05-31 04:47:53 +00:00
|
|
|
extern inthand_t IDTVEC(vmbus_isr);
|
2016-05-24 06:42:14 +00:00
|
|
|
|
2013-05-03 02:03:36 +00:00
|
|
|
static void
|
2016-05-24 06:01:39 +00:00
|
|
|
vmbus_msg_task(void *xsc, int pending __unused)
|
2013-05-03 02:03:36 +00:00
|
|
|
{
|
2016-05-24 06:01:39 +00:00
|
|
|
struct vmbus_softc *sc = xsc;
|
2016-05-31 05:18:55 +00:00
|
|
|
volatile struct vmbus_message *msg;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-30 08:42:35 +00:00
|
|
|
msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
|
2013-05-03 02:03:36 +00:00
|
|
|
for (;;) {
|
2016-06-06 06:18:18 +00:00
|
|
|
if (msg->msg_type == VMBUS_MSGTYPE_NONE) {
|
|
|
|
/* No message */
|
|
|
|
break;
|
2016-06-06 07:27:57 +00:00
|
|
|
} else if (msg->msg_type == VMBUS_MSGTYPE_CHANNEL) {
|
|
|
|
/* Channel message */
|
2016-06-06 07:39:44 +00:00
|
|
|
vmbus_chan_msgproc(sc,
|
|
|
|
__DEVOLATILE(const struct vmbus_message *, msg));
|
hyperv: vmbus: run non-blocking message handlers in vmbus_msg_swintr()
We'll remove the per-channel control_work_queue because it can't properly
do serialization of message handling, e.g., when there are 2 NIC devices,
vmbus_channel_on_offer() -> hv_queue_work_item() has a race condition:
for an SMP VM, vmbus_channel_process_offer() can run concurrently on
different CPUs and if the second NIC's
vmbus_channel_process_offer() -> hv_vmbus_child_device_register() runs
first, the second NIC's name will be hn0 and the first NIC's name will
be hn1!
We can fix the race condition by removing the per-channel control_work_queue
and run all the message handlers in the global
hv_vmbus_g_connection.work_queue -- we'll do this in the next patch.
With the coming next patch, we have to run the non-blocking handlers
directly in the kernel thread vmbus_msg_swintr(), because the special
handling of sub-channel: when a sub-channel (e.g., of the storvsc driver)
is received and being handled in vmbus_channel_on_offer() running on the
global hv_vmbus_g_connection.work_queue, vmbus_channel_process_offer()
invokes channel->sc_creation_callback, i.e., storvsc_handle_sc_creation,
and the callback will invoke hv_vmbus_channel_open() -> hv_vmbus_post_message
and expect a further reply from the host, but the handling of the further
messag can't be done because the current message's handling hasn't finished
yet; as result, hv_vmbus_channel_open() -> sema_timedwait() will time out
and th device can't work.
Also renamed the handler type from hv_pfn_channel_msg_handler to
vmbus_msg_handler: the 'pfn' and 'channel' in the old name make no sense.
Submitted by: Dexuan Cui <decui microsoft com>
Reviewed by: royger
MFC after: 2 weeks
Differential Revision: https://reviews.freebsd.org/D4596
2015-12-29 08:19:43 +00:00
|
|
|
}
|
|
|
|
|
2016-05-31 05:18:55 +00:00
|
|
|
msg->msg_type = VMBUS_MSGTYPE_NONE;
|
2016-05-18 03:50:18 +00:00
|
|
|
/*
|
2016-05-31 05:18:55 +00:00
|
|
|
* Make sure the write to msg_type (i.e. set to
|
|
|
|
* VMBUS_MSGTYPE_NONE) happens before we read the
|
|
|
|
* msg_flags and EOMing. Otherwise, the EOMing will
|
|
|
|
* not deliver any more messages since there is no
|
|
|
|
* empty slot
|
2016-05-18 03:50:18 +00:00
|
|
|
*
|
|
|
|
* NOTE:
|
|
|
|
* mb() is used here, since atomic_thread_fence_seq_cst()
|
|
|
|
* will become compiler fence on UP kernel.
|
|
|
|
*/
|
|
|
|
mb();
|
2016-05-31 05:18:55 +00:00
|
|
|
if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
|
2013-05-03 02:03:36 +00:00
|
|
|
/*
|
|
|
|
* This will cause message queue rescan to possibly
|
|
|
|
* deliver another msg from the hypervisor
|
|
|
|
*/
|
2016-05-27 06:47:04 +00:00
|
|
|
wrmsr(MSR_HV_EOM, 0);
|
2016-05-18 03:50:18 +00:00
|
|
|
}
|
2013-05-03 02:03:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-31 04:47:53 +00:00
|
|
|
static __inline int
|
|
|
|
vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
|
2013-05-03 02:03:36 +00:00
|
|
|
{
|
2016-05-31 05:18:55 +00:00
|
|
|
volatile struct vmbus_message *msg;
|
|
|
|
struct vmbus_message *msg_base;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-24 06:10:21 +00:00
|
|
|
msg_base = VMBUS_PCPU_GET(sc, message, cpu);
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-31 04:10:15 +00:00
|
|
|
/*
|
|
|
|
* Check event timer.
|
|
|
|
*
|
|
|
|
* TODO: move this to independent IDT vector.
|
|
|
|
*/
|
2016-05-30 08:42:35 +00:00
|
|
|
msg = msg_base + VMBUS_SINT_TIMER;
|
2016-05-31 05:18:55 +00:00
|
|
|
if (msg->msg_type == VMBUS_MSGTYPE_TIMER_EXPIRED) {
|
|
|
|
msg->msg_type = VMBUS_MSGTYPE_NONE;
|
2016-01-14 03:05:10 +00:00
|
|
|
|
2016-05-30 08:42:35 +00:00
|
|
|
vmbus_et_intr(frame);
|
2016-03-22 05:48:51 +00:00
|
|
|
|
2016-01-14 03:05:10 +00:00
|
|
|
/*
|
2016-05-31 05:18:55 +00:00
|
|
|
* Make sure the write to msg_type (i.e. set to
|
|
|
|
* VMBUS_MSGTYPE_NONE) happens before we read the
|
|
|
|
* msg_flags and EOMing. Otherwise, the EOMing will
|
|
|
|
* not deliver any more messages since there is no
|
|
|
|
* empty slot
|
2016-04-07 05:31:22 +00:00
|
|
|
*
|
|
|
|
* NOTE:
|
|
|
|
* mb() is used here, since atomic_thread_fence_seq_cst()
|
2016-04-07 05:56:22 +00:00
|
|
|
* will become compiler fence on UP kernel.
|
2016-01-14 03:05:10 +00:00
|
|
|
*/
|
2016-04-07 05:31:22 +00:00
|
|
|
mb();
|
2016-05-31 05:18:55 +00:00
|
|
|
if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
|
2016-01-14 03:05:10 +00:00
|
|
|
/*
|
|
|
|
* This will cause message queue rescan to possibly
|
|
|
|
* deliver another msg from the hypervisor
|
|
|
|
*/
|
2016-05-27 06:47:04 +00:00
|
|
|
wrmsr(MSR_HV_EOM, 0);
|
2016-01-14 03:05:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-31 04:10:15 +00:00
|
|
|
/*
|
|
|
|
* Check events. Hot path for network and storage I/O data; high rate.
|
|
|
|
*
|
|
|
|
* NOTE:
|
|
|
|
* As recommended by the Windows guest fellows, we check events before
|
|
|
|
* checking messages.
|
|
|
|
*/
|
|
|
|
sc->vmbus_event_proc(sc, cpu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check messages. Mainly management stuffs; ultra low rate.
|
|
|
|
*/
|
2016-05-30 08:42:35 +00:00
|
|
|
msg = msg_base + VMBUS_SINT_MESSAGE;
|
2016-05-31 05:18:55 +00:00
|
|
|
if (__predict_false(msg->msg_type != VMBUS_MSGTYPE_NONE)) {
|
2016-05-25 04:59:20 +00:00
|
|
|
taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
|
|
|
|
VMBUS_PCPU_PTR(sc, message_task, cpu));
|
2013-05-03 02:03:36 +00:00
|
|
|
}
|
|
|
|
|
2016-01-14 03:05:10 +00:00
|
|
|
return (FILTER_HANDLED);
|
2013-05-03 02:03:36 +00:00
|
|
|
}
|
|
|
|
|
2015-04-29 10:12:34 +00:00
|
|
|
void
|
2016-05-31 04:47:53 +00:00
|
|
|
vmbus_handle_intr(struct trapframe *trap_frame)
|
2015-04-29 10:12:34 +00:00
|
|
|
{
|
2016-05-24 05:06:01 +00:00
|
|
|
struct vmbus_softc *sc = vmbus_get_softc();
|
|
|
|
int cpu = curcpu;
|
2015-04-29 10:12:34 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Disable preemption.
|
|
|
|
*/
|
|
|
|
critical_enter();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a little interrupt counting.
|
|
|
|
*/
|
2016-05-24 06:10:21 +00:00
|
|
|
(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
|
2015-04-29 10:12:34 +00:00
|
|
|
|
2016-05-31 04:47:53 +00:00
|
|
|
vmbus_handle_intr1(sc, trap_frame, cpu);
|
2015-04-29 10:12:34 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Enable preemption.
|
|
|
|
*/
|
|
|
|
critical_exit();
|
|
|
|
}
|
|
|
|
|
2016-05-24 05:51:51 +00:00
|
|
|
static void
|
2016-05-25 05:37:42 +00:00
|
|
|
vmbus_synic_setup(void *xsc)
|
2016-05-24 05:51:51 +00:00
|
|
|
{
|
2016-05-25 05:37:42 +00:00
|
|
|
struct vmbus_softc *sc = xsc;
|
2016-05-25 05:53:12 +00:00
|
|
|
int cpu = curcpu;
|
|
|
|
uint64_t val, orig;
|
|
|
|
uint32_t sint;
|
2016-05-24 05:51:51 +00:00
|
|
|
|
2016-05-27 07:29:31 +00:00
|
|
|
if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
|
|
|
|
/*
|
|
|
|
* Save virtual processor id.
|
|
|
|
*/
|
|
|
|
VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* XXX
|
|
|
|
* Virtual processoor id is only used by a pretty broken
|
|
|
|
* channel selection code from storvsc. It's nothing
|
|
|
|
* critical even if CPUID_HV_MSR_VP_INDEX is not set; keep
|
|
|
|
* moving on.
|
|
|
|
*/
|
|
|
|
VMBUS_PCPU_GET(sc, vcpuid, cpu) = cpu;
|
|
|
|
}
|
2016-05-24 05:51:51 +00:00
|
|
|
|
|
|
|
/*
|
2016-05-25 05:53:12 +00:00
|
|
|
* Setup the SynIC message.
|
2016-05-24 05:51:51 +00:00
|
|
|
*/
|
2016-05-25 05:53:12 +00:00
|
|
|
orig = rdmsr(MSR_HV_SIMP);
|
|
|
|
val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
|
|
|
|
((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
|
|
|
|
MSR_HV_SIMP_PGSHIFT);
|
|
|
|
wrmsr(MSR_HV_SIMP, val);
|
2016-05-24 05:51:51 +00:00
|
|
|
|
2016-05-25 05:53:12 +00:00
|
|
|
/*
|
|
|
|
* Setup the SynIC event flags.
|
|
|
|
*/
|
|
|
|
orig = rdmsr(MSR_HV_SIEFP);
|
|
|
|
val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
|
2016-06-01 06:51:44 +00:00
|
|
|
((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
|
|
|
|
>> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
|
2016-05-25 05:53:12 +00:00
|
|
|
wrmsr(MSR_HV_SIEFP, val);
|
2016-05-24 05:51:51 +00:00
|
|
|
|
|
|
|
|
2016-05-25 05:53:12 +00:00
|
|
|
/*
|
|
|
|
* Configure and unmask SINT for message and event flags.
|
|
|
|
*/
|
2016-05-30 08:42:35 +00:00
|
|
|
sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
|
2016-05-25 05:53:12 +00:00
|
|
|
orig = rdmsr(sint);
|
|
|
|
val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
|
|
|
|
(orig & MSR_HV_SINT_RSVD_MASK);
|
|
|
|
wrmsr(sint, val);
|
2016-05-24 05:51:51 +00:00
|
|
|
|
2016-05-25 05:53:12 +00:00
|
|
|
/*
|
|
|
|
* Configure and unmask SINT for timer.
|
|
|
|
*/
|
2016-05-30 08:42:35 +00:00
|
|
|
sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
|
2016-05-25 05:53:12 +00:00
|
|
|
orig = rdmsr(sint);
|
|
|
|
val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
|
|
|
|
(orig & MSR_HV_SINT_RSVD_MASK);
|
|
|
|
wrmsr(sint, val);
|
2016-05-24 05:51:51 +00:00
|
|
|
|
|
|
|
/*
|
2016-05-25 05:53:12 +00:00
|
|
|
* All done; enable SynIC.
|
2016-05-24 05:51:51 +00:00
|
|
|
*/
|
2016-05-25 05:53:12 +00:00
|
|
|
orig = rdmsr(MSR_HV_SCONTROL);
|
|
|
|
val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
|
|
|
|
wrmsr(MSR_HV_SCONTROL, val);
|
2016-05-24 05:51:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vmbus_synic_teardown(void *arg)
|
|
|
|
{
|
2016-05-25 05:53:12 +00:00
|
|
|
uint64_t orig;
|
|
|
|
uint32_t sint;
|
2016-05-24 05:51:51 +00:00
|
|
|
|
|
|
|
/*
|
2016-05-25 05:53:12 +00:00
|
|
|
* Disable SynIC.
|
2016-05-24 05:51:51 +00:00
|
|
|
*/
|
2016-05-25 05:53:12 +00:00
|
|
|
orig = rdmsr(MSR_HV_SCONTROL);
|
|
|
|
wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
|
2016-05-24 05:51:51 +00:00
|
|
|
|
|
|
|
/*
|
2016-05-25 05:53:12 +00:00
|
|
|
* Mask message and event flags SINT.
|
2016-05-24 05:51:51 +00:00
|
|
|
*/
|
2016-05-30 08:42:35 +00:00
|
|
|
sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
|
2016-05-25 05:53:12 +00:00
|
|
|
orig = rdmsr(sint);
|
|
|
|
wrmsr(sint, orig | MSR_HV_SINT_MASKED);
|
2016-05-24 05:51:51 +00:00
|
|
|
|
2016-05-25 05:53:12 +00:00
|
|
|
/*
|
|
|
|
* Mask timer SINT.
|
|
|
|
*/
|
2016-05-30 08:42:35 +00:00
|
|
|
sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
|
2016-05-25 05:53:12 +00:00
|
|
|
orig = rdmsr(sint);
|
|
|
|
wrmsr(sint, orig | MSR_HV_SINT_MASKED);
|
2016-05-24 05:51:51 +00:00
|
|
|
|
2016-05-25 05:53:12 +00:00
|
|
|
/*
|
|
|
|
* Teardown SynIC message.
|
|
|
|
*/
|
|
|
|
orig = rdmsr(MSR_HV_SIMP);
|
|
|
|
wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
|
2016-05-24 05:51:51 +00:00
|
|
|
|
2016-05-25 05:53:12 +00:00
|
|
|
/*
|
|
|
|
* Teardown SynIC event flags.
|
|
|
|
*/
|
|
|
|
orig = rdmsr(MSR_HV_SIEFP);
|
|
|
|
wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
|
2016-05-24 05:51:51 +00:00
|
|
|
}
|
|
|
|
|
2016-05-25 03:30:56 +00:00
|
|
|
static int
|
2016-05-24 06:01:39 +00:00
|
|
|
vmbus_dma_alloc(struct vmbus_softc *sc)
|
|
|
|
{
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
CPU_FOREACH(cpu) {
|
2016-05-25 03:30:56 +00:00
|
|
|
void *ptr;
|
|
|
|
|
2016-05-24 06:01:39 +00:00
|
|
|
/*
|
|
|
|
* Per-cpu messages and event flags.
|
|
|
|
*/
|
2016-05-25 03:30:56 +00:00
|
|
|
ptr = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
|
|
|
|
PAGE_SIZE, 0, PAGE_SIZE,
|
2016-05-24 06:10:21 +00:00
|
|
|
VMBUS_PCPU_PTR(sc, message_dma, cpu),
|
2016-05-24 06:01:39 +00:00
|
|
|
BUS_DMA_WAITOK | BUS_DMA_ZERO);
|
2016-05-25 03:30:56 +00:00
|
|
|
if (ptr == NULL)
|
|
|
|
return ENOMEM;
|
|
|
|
VMBUS_PCPU_GET(sc, message, cpu) = ptr;
|
|
|
|
|
|
|
|
ptr = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
|
|
|
|
PAGE_SIZE, 0, PAGE_SIZE,
|
2016-06-01 06:51:44 +00:00
|
|
|
VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
|
2016-05-24 06:01:39 +00:00
|
|
|
BUS_DMA_WAITOK | BUS_DMA_ZERO);
|
2016-05-25 03:30:56 +00:00
|
|
|
if (ptr == NULL)
|
|
|
|
return ENOMEM;
|
2016-06-01 06:51:44 +00:00
|
|
|
VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
|
2016-05-24 06:01:39 +00:00
|
|
|
}
|
2016-05-25 03:30:56 +00:00
|
|
|
return 0;
|
2016-05-24 06:01:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vmbus_dma_free(struct vmbus_softc *sc)
|
|
|
|
{
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
CPU_FOREACH(cpu) {
|
2016-05-24 06:10:21 +00:00
|
|
|
if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
|
2016-05-24 06:01:39 +00:00
|
|
|
hyperv_dmamem_free(
|
2016-05-24 06:10:21 +00:00
|
|
|
VMBUS_PCPU_PTR(sc, message_dma, cpu),
|
|
|
|
VMBUS_PCPU_GET(sc, message, cpu));
|
|
|
|
VMBUS_PCPU_GET(sc, message, cpu) = NULL;
|
2016-05-24 06:01:39 +00:00
|
|
|
}
|
2016-06-01 06:51:44 +00:00
|
|
|
if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
|
2016-05-24 06:01:39 +00:00
|
|
|
hyperv_dmamem_free(
|
2016-06-01 06:51:44 +00:00
|
|
|
VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
|
|
|
|
VMBUS_PCPU_GET(sc, event_flags, cpu));
|
|
|
|
VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
|
2016-05-24 06:01:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-24 06:42:14 +00:00
|
|
|
static int
|
|
|
|
vmbus_intr_setup(struct vmbus_softc *sc)
|
|
|
|
{
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
CPU_FOREACH(cpu) {
|
|
|
|
char buf[MAXCOMLEN + 1];
|
2016-05-25 03:39:42 +00:00
|
|
|
cpuset_t cpu_mask;
|
2016-05-24 06:42:14 +00:00
|
|
|
|
2016-05-25 03:39:42 +00:00
|
|
|
/* Allocate an interrupt counter for Hyper-V interrupt */
|
2016-05-24 06:42:14 +00:00
|
|
|
snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
|
|
|
|
intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
|
|
|
|
|
|
|
|
/*
|
2016-05-25 03:39:42 +00:00
|
|
|
* Setup taskqueue to handle events. Task will be per-
|
|
|
|
* channel.
|
2016-05-24 06:42:14 +00:00
|
|
|
*/
|
2016-05-25 04:59:20 +00:00
|
|
|
VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
|
|
|
|
"hyperv event", M_WAITOK, taskqueue_thread_enqueue,
|
|
|
|
VMBUS_PCPU_PTR(sc, event_tq, cpu));
|
2016-05-24 06:42:14 +00:00
|
|
|
CPU_SETOF(cpu, &cpu_mask);
|
|
|
|
taskqueue_start_threads_cpuset(
|
2016-05-25 04:59:20 +00:00
|
|
|
VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask,
|
|
|
|
"hvevent%d", cpu);
|
2016-05-24 06:42:14 +00:00
|
|
|
|
|
|
|
/*
|
2016-05-25 03:39:42 +00:00
|
|
|
* Setup tasks and taskqueues to handle messages.
|
2016-05-24 06:42:14 +00:00
|
|
|
*/
|
2016-05-25 04:59:20 +00:00
|
|
|
VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
|
2016-05-24 06:42:14 +00:00
|
|
|
"hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
|
2016-05-25 04:59:20 +00:00
|
|
|
VMBUS_PCPU_PTR(sc, message_tq, cpu));
|
2016-05-24 06:42:14 +00:00
|
|
|
CPU_SETOF(cpu, &cpu_mask);
|
|
|
|
taskqueue_start_threads_cpuset(
|
2016-05-25 04:59:20 +00:00
|
|
|
VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
|
|
|
|
"hvmsg%d", cpu);
|
|
|
|
TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
|
2016-05-24 06:42:14 +00:00
|
|
|
vmbus_msg_task, sc);
|
|
|
|
}
|
2016-05-25 03:39:42 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* All Hyper-V ISR required resources are setup, now let's find a
|
|
|
|
* free IDT vector for Hyper-V ISR and set it up.
|
|
|
|
*/
|
2016-05-31 04:47:53 +00:00
|
|
|
sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr));
|
2016-05-25 03:39:42 +00:00
|
|
|
if (sc->vmbus_idtvec < 0) {
|
|
|
|
device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
|
|
|
|
return ENXIO;
|
|
|
|
}
|
|
|
|
if(bootverbose) {
|
|
|
|
device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
|
|
|
|
sc->vmbus_idtvec);
|
|
|
|
}
|
2016-05-24 06:42:14 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vmbus_intr_teardown(struct vmbus_softc *sc)
|
|
|
|
{
|
|
|
|
int cpu;
|
|
|
|
|
2016-05-25 03:39:42 +00:00
|
|
|
if (sc->vmbus_idtvec >= 0) {
|
|
|
|
lapic_ipi_free(sc->vmbus_idtvec);
|
|
|
|
sc->vmbus_idtvec = -1;
|
|
|
|
}
|
|
|
|
|
2016-05-24 06:42:14 +00:00
|
|
|
CPU_FOREACH(cpu) {
|
2016-05-25 04:59:20 +00:00
|
|
|
if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
|
|
|
|
taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
|
|
|
|
VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
|
2016-05-24 06:42:14 +00:00
|
|
|
}
|
2016-05-25 04:59:20 +00:00
|
|
|
if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
|
|
|
|
taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
|
|
|
|
VMBUS_PCPU_PTR(sc, message_task, cpu));
|
|
|
|
taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
|
|
|
|
VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
|
2016-05-24 07:07:11 +00:00
|
|
|
}
|
2016-05-24 06:42:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-03 02:03:36 +00:00
|
|
|
static int
|
2016-05-25 05:30:48 +00:00
|
|
|
vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
|
2013-05-03 02:03:36 +00:00
|
|
|
{
|
|
|
|
struct hv_device *child_dev_ctx = device_get_ivars(child);
|
|
|
|
|
|
|
|
switch (index) {
|
|
|
|
case HV_VMBUS_IVAR_TYPE:
|
2016-05-31 05:34:46 +00:00
|
|
|
*result = (uintptr_t)&child_dev_ctx->class_id;
|
2013-05-03 02:03:36 +00:00
|
|
|
return (0);
|
2016-05-31 05:34:46 +00:00
|
|
|
|
2013-05-03 02:03:36 +00:00
|
|
|
case HV_VMBUS_IVAR_INSTANCE:
|
2016-05-31 05:34:46 +00:00
|
|
|
*result = (uintptr_t)&child_dev_ctx->device_id;
|
2013-05-03 02:03:36 +00:00
|
|
|
return (0);
|
2016-05-31 05:34:46 +00:00
|
|
|
|
2013-05-03 02:03:36 +00:00
|
|
|
case HV_VMBUS_IVAR_DEVCTX:
|
2016-05-31 05:34:46 +00:00
|
|
|
*result = (uintptr_t)child_dev_ctx;
|
2013-05-03 02:03:36 +00:00
|
|
|
return (0);
|
2016-05-31 05:34:46 +00:00
|
|
|
|
2013-05-03 02:03:36 +00:00
|
|
|
case HV_VMBUS_IVAR_NODE:
|
2016-05-31 05:34:46 +00:00
|
|
|
*result = (uintptr_t)child_dev_ctx->device;
|
2013-05-03 02:03:36 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-05-25 05:30:48 +00:00
|
|
|
vmbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
|
2013-05-03 02:03:36 +00:00
|
|
|
{
|
|
|
|
switch (index) {
|
|
|
|
case HV_VMBUS_IVAR_TYPE:
|
|
|
|
case HV_VMBUS_IVAR_INSTANCE:
|
|
|
|
case HV_VMBUS_IVAR_DEVCTX:
|
|
|
|
case HV_VMBUS_IVAR_NODE:
|
|
|
|
/* read-only */
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
|
|
|
|
2016-03-21 07:16:30 +00:00
|
|
|
static int
|
|
|
|
vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
|
|
|
|
{
|
|
|
|
struct hv_device *dev_ctx = device_get_ivars(child);
|
2016-05-31 05:43:59 +00:00
|
|
|
char guidbuf[HYPERV_GUID_STRLEN];
|
2016-03-21 07:16:30 +00:00
|
|
|
|
2016-04-22 05:15:59 +00:00
|
|
|
if (dev_ctx == NULL)
|
|
|
|
return (0);
|
|
|
|
|
2016-03-21 07:16:30 +00:00
|
|
|
strlcat(buf, "classid=", buflen);
|
2016-05-31 05:43:59 +00:00
|
|
|
hyperv_guid2str(&dev_ctx->class_id, guidbuf, sizeof(guidbuf));
|
2016-03-21 07:16:30 +00:00
|
|
|
strlcat(buf, guidbuf, buflen);
|
|
|
|
|
|
|
|
strlcat(buf, " deviceid=", buflen);
|
2016-05-31 05:43:59 +00:00
|
|
|
hyperv_guid2str(&dev_ctx->device_id, guidbuf, sizeof(guidbuf));
|
2016-03-21 07:16:30 +00:00
|
|
|
strlcat(buf, guidbuf, buflen);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2016-05-25 05:30:48 +00:00
|
|
|
struct hv_device *
|
|
|
|
hv_vmbus_child_device_create(hv_guid type, hv_guid instance,
|
|
|
|
hv_vmbus_channel *channel)
|
2013-05-03 02:03:36 +00:00
|
|
|
{
|
2016-05-25 05:30:48 +00:00
|
|
|
hv_device *child_dev;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate the new child device
|
|
|
|
*/
|
2016-05-25 05:30:48 +00:00
|
|
|
child_dev = malloc(sizeof(hv_device), M_DEVBUF, M_WAITOK | M_ZERO);
|
2013-05-03 02:03:36 +00:00
|
|
|
|
|
|
|
child_dev->channel = channel;
|
|
|
|
memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
|
|
|
|
memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
|
|
|
|
|
|
|
|
return (child_dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
hv_vmbus_child_device_register(struct hv_device *child_dev)
|
|
|
|
{
|
2016-05-31 05:43:59 +00:00
|
|
|
device_t child, parent;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-31 05:43:59 +00:00
|
|
|
parent = vmbus_get_device();
|
2016-03-21 06:54:21 +00:00
|
|
|
if (bootverbose) {
|
2016-05-31 05:43:59 +00:00
|
|
|
char name[HYPERV_GUID_STRLEN];
|
|
|
|
|
|
|
|
hyperv_guid2str(&child_dev->class_id, name, sizeof(name));
|
|
|
|
device_printf(parent, "add device, classid: %s\n", name);
|
2016-03-21 06:54:21 +00:00
|
|
|
}
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-31 05:43:59 +00:00
|
|
|
child = device_add_child(parent, NULL, -1);
|
2013-05-03 02:03:36 +00:00
|
|
|
child_dev->device = child;
|
|
|
|
device_set_ivars(child, child_dev);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
hv_vmbus_child_device_unregister(struct hv_device *child_dev)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
/*
|
|
|
|
* XXXKYS: Ensure that this is the opposite of
|
|
|
|
* device_add_child()
|
|
|
|
*/
|
|
|
|
mtx_lock(&Giant);
|
2016-05-23 07:23:19 +00:00
|
|
|
ret = device_delete_child(vmbus_get_device(), child_dev->device);
|
2013-05-03 02:03:36 +00:00
|
|
|
mtx_unlock(&Giant);
|
|
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2016-05-18 07:09:44 +00:00
|
|
|
vmbus_probe(device_t dev)
|
|
|
|
{
|
2016-05-31 05:10:20 +00:00
|
|
|
char *id[] = { "VMBUS", NULL };
|
|
|
|
|
|
|
|
if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
|
2016-05-27 07:29:31 +00:00
|
|
|
device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
|
|
|
|
(hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
|
2016-01-14 02:50:13 +00:00
|
|
|
return (ENXIO);
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-18 07:18:11 +00:00
|
|
|
device_set_desc(dev, "Hyper-V Vmbus");
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-01-14 02:50:13 +00:00
|
|
|
return (BUS_PROBE_DEFAULT);
|
2013-05-03 02:03:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Main vmbus driver initialization routine.
|
|
|
|
*
|
|
|
|
* Here, we
|
|
|
|
* - initialize the vmbus driver context
|
|
|
|
* - setup various driver entry points
|
|
|
|
* - invoke the vmbus hv main init routine
|
|
|
|
* - get the irq resource
|
|
|
|
* - invoke the vmbus to add the vmbus root device
|
|
|
|
* - setup the vmbus root device
|
|
|
|
* - retrieve the channel offers
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
vmbus_bus_init(void)
|
|
|
|
{
|
2016-05-25 05:22:35 +00:00
|
|
|
struct vmbus_softc *sc = vmbus_get_softc();
|
2016-05-24 06:42:14 +00:00
|
|
|
int ret;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-25 05:22:35 +00:00
|
|
|
if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
|
2013-05-03 02:03:36 +00:00
|
|
|
return (0);
|
2016-05-25 05:22:35 +00:00
|
|
|
sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
|
|
|
/*
|
2016-05-25 03:39:42 +00:00
|
|
|
* Allocate DMA stuffs.
|
2013-05-03 02:03:36 +00:00
|
|
|
*/
|
2016-05-25 03:39:42 +00:00
|
|
|
ret = vmbus_dma_alloc(sc);
|
2016-05-24 06:42:14 +00:00
|
|
|
if (ret != 0)
|
2015-04-29 10:12:34 +00:00
|
|
|
goto cleanup;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
|
|
|
/*
|
2016-05-25 03:39:42 +00:00
|
|
|
* Setup interrupt.
|
2016-05-24 06:01:39 +00:00
|
|
|
*/
|
2016-05-25 03:39:42 +00:00
|
|
|
ret = vmbus_intr_setup(sc);
|
2016-05-25 03:30:56 +00:00
|
|
|
if (ret != 0)
|
|
|
|
goto cleanup;
|
2016-05-24 06:01:39 +00:00
|
|
|
|
2016-05-25 05:22:35 +00:00
|
|
|
/*
|
|
|
|
* Setup SynIC.
|
|
|
|
*/
|
2015-04-29 10:12:34 +00:00
|
|
|
if (bootverbose)
|
2016-05-25 05:22:35 +00:00
|
|
|
device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
|
2016-05-25 05:37:42 +00:00
|
|
|
smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
|
2016-05-25 05:22:35 +00:00
|
|
|
sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2013-09-09 08:07:46 +00:00
|
|
|
/*
|
2013-05-03 02:03:36 +00:00
|
|
|
* Connect to VMBus in the root partition
|
|
|
|
*/
|
|
|
|
ret = hv_vmbus_connect();
|
|
|
|
|
2013-09-09 08:07:46 +00:00
|
|
|
if (ret != 0)
|
2016-05-24 06:42:14 +00:00
|
|
|
goto cleanup;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-18 03:41:37 +00:00
|
|
|
if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
|
|
|
|
hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)
|
|
|
|
sc->vmbus_event_proc = vmbus_event_proc_compat;
|
|
|
|
else
|
|
|
|
sc->vmbus_event_proc = vmbus_event_proc;
|
|
|
|
|
2013-05-03 02:03:36 +00:00
|
|
|
hv_vmbus_request_channel_offers();
|
2016-04-19 09:42:48 +00:00
|
|
|
|
|
|
|
vmbus_scan();
|
2016-05-23 07:23:19 +00:00
|
|
|
bus_generic_attach(sc->vmbus_dev);
|
|
|
|
device_printf(sc->vmbus_dev, "device scan, probe and attach done\n");
|
2016-04-19 09:42:48 +00:00
|
|
|
|
2013-05-03 02:03:36 +00:00
|
|
|
return (ret);
|
|
|
|
|
2016-05-24 06:42:14 +00:00
|
|
|
cleanup:
|
|
|
|
vmbus_intr_teardown(sc);
|
2016-05-25 03:39:42 +00:00
|
|
|
vmbus_dma_free(sc);
|
2013-05-03 02:03:36 +00:00
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2016-05-18 03:41:37 +00:00
|
|
|
static void
|
|
|
|
vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2013-05-03 02:03:36 +00:00
|
|
|
static int
|
|
|
|
vmbus_attach(device_t dev)
|
|
|
|
{
|
2016-05-18 03:19:53 +00:00
|
|
|
vmbus_sc = device_get_softc(dev);
|
2016-05-23 07:23:19 +00:00
|
|
|
vmbus_sc->vmbus_dev = dev;
|
2016-05-24 06:42:14 +00:00
|
|
|
vmbus_sc->vmbus_idtvec = -1;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-18 03:41:37 +00:00
|
|
|
/*
|
|
|
|
* Event processing logic will be configured:
|
|
|
|
* - After the vmbus protocol version negotiation.
|
|
|
|
* - Before we request channel offers.
|
|
|
|
*/
|
|
|
|
vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
|
|
|
|
|
Add an EARLY_AP_STARTUP option to start APs earlier during boot.
Currently, Application Processors (non-boot CPUs) are started by
MD code at SI_SUB_CPU, but they are kept waiting in a "pen" until
SI_SUB_SMP at which point they are released to run kernel threads.
SI_SUB_SMP is one of the last SYSINIT levels, so APs don't enter
the scheduler and start running threads until fairly late in the
boot.
This change moves SI_SUB_SMP up to just before software interrupt
threads are created allowing the APs to start executing kernel
threads much sooner (before any devices are probed). This allows
several initialization routines that need to perform initialization
on all CPUs to now perform that initialization in one step rather
than having to defer the AP initialization to a second SYSINIT run
at SI_SUB_SMP. It also permits all CPUs to be available for
handling interrupts before any devices are probed.
This last feature fixes a problem on with interrupt vector exhaustion.
Specifically, in the old model all device interrupts were routed
onto the boot CPU during boot. Later after the APs were released at
SI_SUB_SMP, interrupts were redistributed across all CPUs.
However, several drivers for multiqueue hardware allocate N interrupts
per CPU in the system. In a system with many CPUs, just a few drivers
doing this could exhaust the available pool of interrupt vectors on
the boot CPU as each driver was allocating N * mp_ncpu vectors on the
boot CPU. Now, drivers will allocate interrupts on their desired CPUs
during boot meaning that only N interrupts are allocated from the boot
CPU instead of N * mp_ncpu.
Some other bits of code can also be simplified as smp_started is
now true much earlier and will now always be true for these bits of
code. This removes the need to treat the single-CPU boot environment
as a special case.
As a transition aid, the new behavior is available under a new kernel
option (EARLY_AP_STARTUP). This will allow the option to be turned off
if need be during initial testing. I plan to enable this on x86 by
default in a followup commit in the next few days and to have all
platforms moved over before 11.0. Once the transition is complete,
the option will be removed along with the !EARLY_AP_STARTUP code.
These changes have only been tested on x86. Other platform maintainers
are encouraged to port their architectures over as well. The main
things to check for are any uses of smp_started in MD code that can be
simplified and SI_SUB_SMP SYSINITs in MD code that can be removed in
the EARLY_AP_STARTUP case (e.g. the interrupt shuffling).
PR: kern/199321
Reviewed by: markj, gnn, kib
Sponsored by: Netflix
2016-05-14 18:22:52 +00:00
|
|
|
#ifndef EARLY_AP_STARTUP
|
2013-05-03 02:03:36 +00:00
|
|
|
/*
|
|
|
|
* If the system has already booted and thread
|
|
|
|
* scheduling is possible indicated by the global
|
|
|
|
* cold set to zero, we just call the driver
|
|
|
|
* initialization directly.
|
|
|
|
*/
|
|
|
|
if (!cold)
|
Add an EARLY_AP_STARTUP option to start APs earlier during boot.
Currently, Application Processors (non-boot CPUs) are started by
MD code at SI_SUB_CPU, but they are kept waiting in a "pen" until
SI_SUB_SMP at which point they are released to run kernel threads.
SI_SUB_SMP is one of the last SYSINIT levels, so APs don't enter
the scheduler and start running threads until fairly late in the
boot.
This change moves SI_SUB_SMP up to just before software interrupt
threads are created allowing the APs to start executing kernel
threads much sooner (before any devices are probed). This allows
several initialization routines that need to perform initialization
on all CPUs to now perform that initialization in one step rather
than having to defer the AP initialization to a second SYSINIT run
at SI_SUB_SMP. It also permits all CPUs to be available for
handling interrupts before any devices are probed.
This last feature fixes a problem on with interrupt vector exhaustion.
Specifically, in the old model all device interrupts were routed
onto the boot CPU during boot. Later after the APs were released at
SI_SUB_SMP, interrupts were redistributed across all CPUs.
However, several drivers for multiqueue hardware allocate N interrupts
per CPU in the system. In a system with many CPUs, just a few drivers
doing this could exhaust the available pool of interrupt vectors on
the boot CPU as each driver was allocating N * mp_ncpu vectors on the
boot CPU. Now, drivers will allocate interrupts on their desired CPUs
during boot meaning that only N interrupts are allocated from the boot
CPU instead of N * mp_ncpu.
Some other bits of code can also be simplified as smp_started is
now true much earlier and will now always be true for these bits of
code. This removes the need to treat the single-CPU boot environment
as a special case.
As a transition aid, the new behavior is available under a new kernel
option (EARLY_AP_STARTUP). This will allow the option to be turned off
if need be during initial testing. I plan to enable this on x86 by
default in a followup commit in the next few days and to have all
platforms moved over before 11.0. Once the transition is complete,
the option will be removed along with the !EARLY_AP_STARTUP code.
These changes have only been tested on x86. Other platform maintainers
are encouraged to port their architectures over as well. The main
things to check for are any uses of smp_started in MD code that can be
simplified and SI_SUB_SMP SYSINITs in MD code that can be removed in
the EARLY_AP_STARTUP case (e.g. the interrupt shuffling).
PR: kern/199321
Reviewed by: markj, gnn, kib
Sponsored by: Netflix
2016-05-14 18:22:52 +00:00
|
|
|
#endif
|
2013-05-03 02:03:36 +00:00
|
|
|
vmbus_bus_init();
|
|
|
|
|
2016-04-22 05:15:59 +00:00
|
|
|
bus_generic_probe(dev);
|
2013-05-03 02:03:36 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2016-05-18 07:01:32 +00:00
|
|
|
vmbus_sysinit(void *arg __unused)
|
2013-05-03 02:03:36 +00:00
|
|
|
{
|
2016-05-18 03:19:53 +00:00
|
|
|
if (vm_guest != VM_GUEST_HV || vmbus_get_softc() == NULL)
|
2013-10-13 02:41:30 +00:00
|
|
|
return;
|
|
|
|
|
Add an EARLY_AP_STARTUP option to start APs earlier during boot.
Currently, Application Processors (non-boot CPUs) are started by
MD code at SI_SUB_CPU, but they are kept waiting in a "pen" until
SI_SUB_SMP at which point they are released to run kernel threads.
SI_SUB_SMP is one of the last SYSINIT levels, so APs don't enter
the scheduler and start running threads until fairly late in the
boot.
This change moves SI_SUB_SMP up to just before software interrupt
threads are created allowing the APs to start executing kernel
threads much sooner (before any devices are probed). This allows
several initialization routines that need to perform initialization
on all CPUs to now perform that initialization in one step rather
than having to defer the AP initialization to a second SYSINIT run
at SI_SUB_SMP. It also permits all CPUs to be available for
handling interrupts before any devices are probed.
This last feature fixes a problem on with interrupt vector exhaustion.
Specifically, in the old model all device interrupts were routed
onto the boot CPU during boot. Later after the APs were released at
SI_SUB_SMP, interrupts were redistributed across all CPUs.
However, several drivers for multiqueue hardware allocate N interrupts
per CPU in the system. In a system with many CPUs, just a few drivers
doing this could exhaust the available pool of interrupt vectors on
the boot CPU as each driver was allocating N * mp_ncpu vectors on the
boot CPU. Now, drivers will allocate interrupts on their desired CPUs
during boot meaning that only N interrupts are allocated from the boot
CPU instead of N * mp_ncpu.
Some other bits of code can also be simplified as smp_started is
now true much earlier and will now always be true for these bits of
code. This removes the need to treat the single-CPU boot environment
as a special case.
As a transition aid, the new behavior is available under a new kernel
option (EARLY_AP_STARTUP). This will allow the option to be turned off
if need be during initial testing. I plan to enable this on x86 by
default in a followup commit in the next few days and to have all
platforms moved over before 11.0. Once the transition is complete,
the option will be removed along with the !EARLY_AP_STARTUP code.
These changes have only been tested on x86. Other platform maintainers
are encouraged to port their architectures over as well. The main
things to check for are any uses of smp_started in MD code that can be
simplified and SI_SUB_SMP SYSINITs in MD code that can be removed in
the EARLY_AP_STARTUP case (e.g. the interrupt shuffling).
PR: kern/199321
Reviewed by: markj, gnn, kib
Sponsored by: Netflix
2016-05-14 18:22:52 +00:00
|
|
|
#ifndef EARLY_AP_STARTUP
|
2013-05-03 02:03:36 +00:00
|
|
|
/*
|
|
|
|
* If the system has already booted and thread
|
2013-10-13 02:41:30 +00:00
|
|
|
* scheduling is possible, as indicated by the
|
|
|
|
* global cold set to zero, we just call the driver
|
2013-05-03 02:03:36 +00:00
|
|
|
* initialization directly.
|
|
|
|
*/
|
|
|
|
if (!cold)
|
Add an EARLY_AP_STARTUP option to start APs earlier during boot.
Currently, Application Processors (non-boot CPUs) are started by
MD code at SI_SUB_CPU, but they are kept waiting in a "pen" until
SI_SUB_SMP at which point they are released to run kernel threads.
SI_SUB_SMP is one of the last SYSINIT levels, so APs don't enter
the scheduler and start running threads until fairly late in the
boot.
This change moves SI_SUB_SMP up to just before software interrupt
threads are created allowing the APs to start executing kernel
threads much sooner (before any devices are probed). This allows
several initialization routines that need to perform initialization
on all CPUs to now perform that initialization in one step rather
than having to defer the AP initialization to a second SYSINIT run
at SI_SUB_SMP. It also permits all CPUs to be available for
handling interrupts before any devices are probed.
This last feature fixes a problem on with interrupt vector exhaustion.
Specifically, in the old model all device interrupts were routed
onto the boot CPU during boot. Later after the APs were released at
SI_SUB_SMP, interrupts were redistributed across all CPUs.
However, several drivers for multiqueue hardware allocate N interrupts
per CPU in the system. In a system with many CPUs, just a few drivers
doing this could exhaust the available pool of interrupt vectors on
the boot CPU as each driver was allocating N * mp_ncpu vectors on the
boot CPU. Now, drivers will allocate interrupts on their desired CPUs
during boot meaning that only N interrupts are allocated from the boot
CPU instead of N * mp_ncpu.
Some other bits of code can also be simplified as smp_started is
now true much earlier and will now always be true for these bits of
code. This removes the need to treat the single-CPU boot environment
as a special case.
As a transition aid, the new behavior is available under a new kernel
option (EARLY_AP_STARTUP). This will allow the option to be turned off
if need be during initial testing. I plan to enable this on x86 by
default in a followup commit in the next few days and to have all
platforms moved over before 11.0. Once the transition is complete,
the option will be removed along with the !EARLY_AP_STARTUP code.
These changes have only been tested on x86. Other platform maintainers
are encouraged to port their architectures over as well. The main
things to check for are any uses of smp_started in MD code that can be
simplified and SI_SUB_SMP SYSINITs in MD code that can be removed in
the EARLY_AP_STARTUP case (e.g. the interrupt shuffling).
PR: kern/199321
Reviewed by: markj, gnn, kib
Sponsored by: Netflix
2016-05-14 18:22:52 +00:00
|
|
|
#endif
|
2013-05-03 02:03:36 +00:00
|
|
|
vmbus_bus_init();
|
|
|
|
}
|
|
|
|
|
2016-05-18 06:29:03 +00:00
|
|
|
static int
|
|
|
|
vmbus_detach(device_t dev)
|
2013-05-03 02:03:36 +00:00
|
|
|
{
|
2016-05-23 07:32:34 +00:00
|
|
|
struct vmbus_softc *sc = device_get_softc(dev);
|
2013-09-09 08:07:46 +00:00
|
|
|
|
2013-05-03 02:03:36 +00:00
|
|
|
hv_vmbus_release_unattached_channels();
|
|
|
|
hv_vmbus_disconnect();
|
|
|
|
|
2016-05-25 05:22:35 +00:00
|
|
|
if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
|
|
|
|
sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
|
|
|
|
smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
|
|
|
|
}
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-24 06:42:14 +00:00
|
|
|
vmbus_intr_teardown(sc);
|
2016-05-25 03:39:42 +00:00
|
|
|
vmbus_dma_free(sc);
|
2013-05-03 02:03:36 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static device_method_t vmbus_methods[] = {
|
2016-05-18 06:52:20 +00:00
|
|
|
/* Device interface */
|
|
|
|
DEVMETHOD(device_probe, vmbus_probe),
|
|
|
|
DEVMETHOD(device_attach, vmbus_attach),
|
|
|
|
DEVMETHOD(device_detach, vmbus_detach),
|
|
|
|
DEVMETHOD(device_shutdown, bus_generic_shutdown),
|
|
|
|
DEVMETHOD(device_suspend, bus_generic_suspend),
|
|
|
|
DEVMETHOD(device_resume, bus_generic_resume),
|
|
|
|
|
|
|
|
/* Bus interface */
|
|
|
|
DEVMETHOD(bus_add_child, bus_generic_add_child),
|
|
|
|
DEVMETHOD(bus_print_child, bus_generic_print_child),
|
|
|
|
DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
|
|
|
|
DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
|
|
|
|
DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str),
|
|
|
|
|
|
|
|
DEVMETHOD_END
|
|
|
|
};
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-18 03:19:53 +00:00
|
|
|
static driver_t vmbus_driver = {
|
|
|
|
"vmbus",
|
|
|
|
vmbus_methods,
|
|
|
|
sizeof(struct vmbus_softc)
|
|
|
|
};
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-18 06:43:15 +00:00
|
|
|
static devclass_t vmbus_devclass;
|
2013-05-03 02:03:36 +00:00
|
|
|
|
2016-05-18 06:19:22 +00:00
|
|
|
DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
|
2016-01-14 02:50:13 +00:00
|
|
|
MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
|
|
|
|
MODULE_VERSION(vmbus, 1);
|
2013-05-03 02:03:36 +00:00
|
|
|
|
Add an EARLY_AP_STARTUP option to start APs earlier during boot.
Currently, Application Processors (non-boot CPUs) are started by
MD code at SI_SUB_CPU, but they are kept waiting in a "pen" until
SI_SUB_SMP at which point they are released to run kernel threads.
SI_SUB_SMP is one of the last SYSINIT levels, so APs don't enter
the scheduler and start running threads until fairly late in the
boot.
This change moves SI_SUB_SMP up to just before software interrupt
threads are created allowing the APs to start executing kernel
threads much sooner (before any devices are probed). This allows
several initialization routines that need to perform initialization
on all CPUs to now perform that initialization in one step rather
than having to defer the AP initialization to a second SYSINIT run
at SI_SUB_SMP. It also permits all CPUs to be available for
handling interrupts before any devices are probed.
This last feature fixes a problem on with interrupt vector exhaustion.
Specifically, in the old model all device interrupts were routed
onto the boot CPU during boot. Later after the APs were released at
SI_SUB_SMP, interrupts were redistributed across all CPUs.
However, several drivers for multiqueue hardware allocate N interrupts
per CPU in the system. In a system with many CPUs, just a few drivers
doing this could exhaust the available pool of interrupt vectors on
the boot CPU as each driver was allocating N * mp_ncpu vectors on the
boot CPU. Now, drivers will allocate interrupts on their desired CPUs
during boot meaning that only N interrupts are allocated from the boot
CPU instead of N * mp_ncpu.
Some other bits of code can also be simplified as smp_started is
now true much earlier and will now always be true for these bits of
code. This removes the need to treat the single-CPU boot environment
as a special case.
As a transition aid, the new behavior is available under a new kernel
option (EARLY_AP_STARTUP). This will allow the option to be turned off
if need be during initial testing. I plan to enable this on x86 by
default in a followup commit in the next few days and to have all
platforms moved over before 11.0. Once the transition is complete,
the option will be removed along with the !EARLY_AP_STARTUP code.
These changes have only been tested on x86. Other platform maintainers
are encouraged to port their architectures over as well. The main
things to check for are any uses of smp_started in MD code that can be
simplified and SI_SUB_SMP SYSINITs in MD code that can be removed in
the EARLY_AP_STARTUP case (e.g. the interrupt shuffling).
PR: kern/199321
Reviewed by: markj, gnn, kib
Sponsored by: Netflix
2016-05-14 18:22:52 +00:00
|
|
|
#ifndef EARLY_AP_STARTUP
|
2016-05-18 07:01:32 +00:00
|
|
|
/*
|
|
|
|
* NOTE:
|
|
|
|
* We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
|
|
|
|
* initialized.
|
|
|
|
*/
|
|
|
|
SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
|
Add an EARLY_AP_STARTUP option to start APs earlier during boot.
Currently, Application Processors (non-boot CPUs) are started by
MD code at SI_SUB_CPU, but they are kept waiting in a "pen" until
SI_SUB_SMP at which point they are released to run kernel threads.
SI_SUB_SMP is one of the last SYSINIT levels, so APs don't enter
the scheduler and start running threads until fairly late in the
boot.
This change moves SI_SUB_SMP up to just before software interrupt
threads are created allowing the APs to start executing kernel
threads much sooner (before any devices are probed). This allows
several initialization routines that need to perform initialization
on all CPUs to now perform that initialization in one step rather
than having to defer the AP initialization to a second SYSINIT run
at SI_SUB_SMP. It also permits all CPUs to be available for
handling interrupts before any devices are probed.
This last feature fixes a problem on with interrupt vector exhaustion.
Specifically, in the old model all device interrupts were routed
onto the boot CPU during boot. Later after the APs were released at
SI_SUB_SMP, interrupts were redistributed across all CPUs.
However, several drivers for multiqueue hardware allocate N interrupts
per CPU in the system. In a system with many CPUs, just a few drivers
doing this could exhaust the available pool of interrupt vectors on
the boot CPU as each driver was allocating N * mp_ncpu vectors on the
boot CPU. Now, drivers will allocate interrupts on their desired CPUs
during boot meaning that only N interrupts are allocated from the boot
CPU instead of N * mp_ncpu.
Some other bits of code can also be simplified as smp_started is
now true much earlier and will now always be true for these bits of
code. This removes the need to treat the single-CPU boot environment
as a special case.
As a transition aid, the new behavior is available under a new kernel
option (EARLY_AP_STARTUP). This will allow the option to be turned off
if need be during initial testing. I plan to enable this on x86 by
default in a followup commit in the next few days and to have all
platforms moved over before 11.0. Once the transition is complete,
the option will be removed along with the !EARLY_AP_STARTUP code.
These changes have only been tested on x86. Other platform maintainers
are encouraged to port their architectures over as well. The main
things to check for are any uses of smp_started in MD code that can be
simplified and SI_SUB_SMP SYSINITs in MD code that can be removed in
the EARLY_AP_STARTUP case (e.g. the interrupt shuffling).
PR: kern/199321
Reviewed by: markj, gnn, kib
Sponsored by: Netflix
2016-05-14 18:22:52 +00:00
|
|
|
#endif
|