freebsd-dev/sys/x86/acpica/acpi_wakeup.c

485 lines
12 KiB
C
Raw Normal View History

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
* Copyright (c) 2001-2012 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
* Copyright (c) 2003 Peter Wemm
* Copyright (c) 2008-2012 Jung-uk Kim <jkim@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#if defined(__amd64__)
#define DEV_APIC
#else
#include "opt_apic.h"
#endif
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/memrange.h>
#include <sys/smp.h>
2013-12-24 14:48:52 +00:00
#include <sys/systm.h>
#include <sys/cons.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/clock.h>
#include <machine/cpu.h>
#include <machine/intr_machdep.h>
#include <machine/md_var.h>
#include <x86/mca.h>
#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <x86/ucode.h>
#ifdef DEV_APIC
#include <x86/apicreg.h>
#include <x86/apicvar.h>
#endif
#ifdef SMP
#include <machine/smp.h>
#include <machine/vmparam.h>
#endif
2009-06-05 18:44:36 +00:00
#include <contrib/dev/acpica/include/acpi.h>
#include <dev/acpica/acpivar.h>
#include "acpi_wakecode.h"
#include "acpi_wakedata.h"
/* Make sure the code is less than a page and leave room for the stack. */
CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024);
extern int acpi_resume_beep;
extern int acpi_reset_video;
extern int acpi_susp_bounce;
#ifdef SMP
extern struct susppcb **susppcbs;
static cpuset_t suspcpus;
#else
static struct susppcb **susppcbs;
#endif
static void *acpi_alloc_wakeup_handler(void **);
static void acpi_stop_beep(void *);
#ifdef SMP
static int acpi_wakeup_ap(struct acpi_softc *, int);
static void acpi_wakeup_cpus(struct acpi_softc *);
#endif
#ifdef __amd64__
#define ACPI_WAKEPAGES 4
#else
#define ACPI_WAKEPAGES 1
#endif
#define WAKECODE_FIXUP(offset, type, val) do { \
type *addr; \
addr = (type *)(sc->acpi_wakeaddr + (offset)); \
*addr = val; \
} while (0)
static void
acpi_stop_beep(void *arg)
{
if (acpi_resume_beep != 0)
timer_spkr_release();
}
#ifdef SMP
static int
acpi_wakeup_ap(struct acpi_softc *sc, int cpu)
{
struct pcb *pcb;
int vector = (sc->acpi_wakephys >> 12) & 0xff;
int apic_id = cpu_apic_ids[cpu];
int ms;
pcb = &susppcbs[cpu]->sp_pcb;
WAKECODE_FIXUP(wakeup_pcb, struct pcb *, pcb);
WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit);
WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base);
ipi_startup(apic_id, vector);
/* Wait up to 5 seconds for it to resume. */
for (ms = 0; ms < 5000; ms++) {
if (!CPU_ISSET(cpu, &suspended_cpus))
return (1); /* return SUCCESS */
DELAY(1000);
}
return (0); /* return FAILURE */
}
#define WARMBOOT_TARGET 0
i386 4/4G split. The change makes the user and kernel address spaces on i386 independent, giving each almost the full 4G of usable virtual addresses except for one PDE at top used for trampoline and per-CPU trampoline stacks, and system structures that must be always mapped, namely IDT, GDT, common TSS and LDT, and process-private TSS and LDT if allocated. By using 1:1 mapping for the kernel text and data, it appeared possible to eliminate assembler part of the locore.S which bootstraps initial page table and KPTmap. The code is rewritten in C and moved into the pmap_cold(). The comment in vmparam.h explains the KVA layout. There is no PCID mechanism available in protected mode, so each kernel/user switch forth and back completely flushes the TLB, except for the trampoline PTD region. The TLB invalidations for userspace becomes trivial, because IPI handlers switch page tables. On the other hand, context switches no longer need to reload %cr3. copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for new copyout(9) is compatibility with wiring user buffers around sysctl handlers. This explains two kind of locks for copyout ptes and accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow path, is only tried after the 'fast path' failed, which temporary changes mapping to the userspace and copies the data to/from small per-cpu buffer in the trampoline. If a page fault occurs during the copy, it is short-circuit by exception.s to not even reach C code. The change was motivated by the need to implement the Meltdown mitigation, but instead of KPTI the full split is done. The i386 architecture already shows the sizing problems, in particular, it is impossible to link clang and lld with debugging. I expect that the issues due to the virtual address space limits would only exaggerate and the split gives more liveness to the platform. Tested by: pho Discussed with: bde Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D14633
2018-04-13 20:30:49 +00:00
#ifdef __amd64__
#define WARMBOOT_OFF (KERNBASE + 0x0467)
#define WARMBOOT_SEG (KERNBASE + 0x0469)
i386 4/4G split. The change makes the user and kernel address spaces on i386 independent, giving each almost the full 4G of usable virtual addresses except for one PDE at top used for trampoline and per-CPU trampoline stacks, and system structures that must be always mapped, namely IDT, GDT, common TSS and LDT, and process-private TSS and LDT if allocated. By using 1:1 mapping for the kernel text and data, it appeared possible to eliminate assembler part of the locore.S which bootstraps initial page table and KPTmap. The code is rewritten in C and moved into the pmap_cold(). The comment in vmparam.h explains the KVA layout. There is no PCID mechanism available in protected mode, so each kernel/user switch forth and back completely flushes the TLB, except for the trampoline PTD region. The TLB invalidations for userspace becomes trivial, because IPI handlers switch page tables. On the other hand, context switches no longer need to reload %cr3. copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for new copyout(9) is compatibility with wiring user buffers around sysctl handlers. This explains two kind of locks for copyout ptes and accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow path, is only tried after the 'fast path' failed, which temporary changes mapping to the userspace and copies the data to/from small per-cpu buffer in the trampoline. If a page fault occurs during the copy, it is short-circuit by exception.s to not even reach C code. The change was motivated by the need to implement the Meltdown mitigation, but instead of KPTI the full split is done. The i386 architecture already shows the sizing problems, in particular, it is impossible to link clang and lld with debugging. I expect that the issues due to the virtual address space limits would only exaggerate and the split gives more liveness to the platform. Tested by: pho Discussed with: bde Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D14633
2018-04-13 20:30:49 +00:00
#else /* __i386__ */
#define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467)
#define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469)
#endif
#define CMOS_REG (0x70)
#define CMOS_DATA (0x71)
#define BIOS_RESET (0x0f)
#define BIOS_WARM (0x0a)
static void
acpi_wakeup_cpus(struct acpi_softc *sc)
{
uint32_t mpbioswarmvec;
int cpu;
u_char mpbiosreason;
/* save the current value of the warm-start vector */
mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF);
outb(CMOS_REG, BIOS_RESET);
mpbiosreason = inb(CMOS_DATA);
/* setup a vector to our boot code */
*((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET;
*((volatile u_short *)WARMBOOT_SEG) = sc->acpi_wakephys >> 4;
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
/* Wake up each AP. */
for (cpu = 1; cpu < mp_ncpus; cpu++) {
if (!CPU_ISSET(cpu, &suspcpus))
continue;
if (acpi_wakeup_ap(sc, cpu) == 0) {
/* restore the warmstart vector */
*(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)",
cpu, cpu_apic_ids[cpu]);
}
}
Remove the permanent double mapping of low physical memory and replace it by a transient double mapping for the one instruction in ACPI wakeup where it is needed (and for many surrounding instructions in ACPI resume). Invalidate the TLB as soon as convenient after undoing the transient mapping. ACPI resume already has the strict ordering needed for this. This fixes the non-trapping of null pointers and other garbage pointers below NBPDR (except transiently). NBPDR is quite large (4MB, or 2MB for PAE). This fixes spurious traps at the first instruction in VM86 bioscalls. The traps are for transiently missing read permission in the first VM86 page (physical page 0) which was just written to at KERNBASE in the kernel. The mechanism is unknown (it is not simply PG_G). locore uses a similar but larger transient double mapping and needs it for 2 instructions instead of 1. Unmap the first PDE in it after the 2 instructions to detect most garbage pointers while bootstrapping. pmap_bootstrap() finishes the unmapping. Remove the avoidance of the double mapping for a recently fixed special case. ACPI resume could use this avoidance (made non-special) to avoid any problems with the transient double mapping, but no such problems are known. Update comments in locore. Many were for old versions of FreeBSD which tried to map low memory r/o except for special cases, or might have allowed access to low memory via physical offsets. Now all kernel maps are r/w, and removal of of the double map disallows use of physical offsets again.
2017-12-18 13:53:22 +00:00
#ifdef __i386__
/*
* Remove the identity mapping of low memory for all CPUs and sync
* the TLB for the BSP. The APs are now spinning in
* cpususpend_handler() and we will release them soon. Then each
* will invalidate its TLB.
*/
i386 4/4G split. The change makes the user and kernel address spaces on i386 independent, giving each almost the full 4G of usable virtual addresses except for one PDE at top used for trampoline and per-CPU trampoline stacks, and system structures that must be always mapped, namely IDT, GDT, common TSS and LDT, and process-private TSS and LDT if allocated. By using 1:1 mapping for the kernel text and data, it appeared possible to eliminate assembler part of the locore.S which bootstraps initial page table and KPTmap. The code is rewritten in C and moved into the pmap_cold(). The comment in vmparam.h explains the KVA layout. There is no PCID mechanism available in protected mode, so each kernel/user switch forth and back completely flushes the TLB, except for the trampoline PTD region. The TLB invalidations for userspace becomes trivial, because IPI handlers switch page tables. On the other hand, context switches no longer need to reload %cr3. copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for new copyout(9) is compatibility with wiring user buffers around sysctl handlers. This explains two kind of locks for copyout ptes and accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow path, is only tried after the 'fast path' failed, which temporary changes mapping to the userspace and copies the data to/from small per-cpu buffer in the trampoline. If a page fault occurs during the copy, it is short-circuit by exception.s to not even reach C code. The change was motivated by the need to implement the Meltdown mitigation, but instead of KPTI the full split is done. The i386 architecture already shows the sizing problems, in particular, it is impossible to link clang and lld with debugging. I expect that the issues due to the virtual address space limits would only exaggerate and the split gives more liveness to the platform. Tested by: pho Discussed with: bde Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D14633
2018-04-13 20:30:49 +00:00
PTD[KPTDI] = 0;
Remove the permanent double mapping of low physical memory and replace it by a transient double mapping for the one instruction in ACPI wakeup where it is needed (and for many surrounding instructions in ACPI resume). Invalidate the TLB as soon as convenient after undoing the transient mapping. ACPI resume already has the strict ordering needed for this. This fixes the non-trapping of null pointers and other garbage pointers below NBPDR (except transiently). NBPDR is quite large (4MB, or 2MB for PAE). This fixes spurious traps at the first instruction in VM86 bioscalls. The traps are for transiently missing read permission in the first VM86 page (physical page 0) which was just written to at KERNBASE in the kernel. The mechanism is unknown (it is not simply PG_G). locore uses a similar but larger transient double mapping and needs it for 2 instructions instead of 1. Unmap the first PDE in it after the 2 instructions to detect most garbage pointers while bootstrapping. pmap_bootstrap() finishes the unmapping. Remove the avoidance of the double mapping for a recently fixed special case. ACPI resume could use this avoidance (made non-special) to avoid any problems with the transient double mapping, but no such problems are known. Update comments in locore. Many were for old versions of FreeBSD which tried to map low memory r/o except for special cases, or might have allowed access to low memory via physical offsets. Now all kernel maps are r/w, and removal of of the double map disallows use of physical offsets again.
2017-12-18 13:53:22 +00:00
invltlb_glob();
#endif
/* restore the warmstart vector */
*(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, mpbiosreason);
}
#endif
int
acpi_sleep_machdep(struct acpi_softc *sc, int state)
{
ACPI_STATUS status;
struct pcb *pcb;
#ifdef __amd64__
struct pcpu *pc;
int i;
#endif
if (sc->acpi_wakeaddr == 0ul)
return (-1); /* couldn't alloc wake memory */
#ifdef SMP
suspcpus = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &suspcpus);
#endif
if (acpi_resume_beep != 0)
timer_spkr_acquire();
AcpiSetFirmwareWakingVector(sc->acpi_wakephys, 0);
intr_suspend();
pcb = &susppcbs[0]->sp_pcb;
if (savectx(pcb)) {
#ifdef __amd64__
fpususpend(susppcbs[0]->sp_fpususpend);
#else
npxsuspend(susppcbs[0]->sp_fpususpend);
#endif
#ifdef SMP
if (!CPU_EMPTY(&suspcpus) && suspend_cpus(suspcpus) == 0) {
Commit the support for removing cpumask_t and replacing it directly with cpuset_t objects. That is going to offer the underlying support for a simple bump of MAXCPU and then support for number of cpus > 32 (as it is today). Right now, cpumask_t is an int, 32 bits on all our supported architecture. cpumask_t on the other side is implemented as an array of longs, and easilly extendible by definition. The architectures touched by this commit are the following: - amd64 - i386 - pc98 - arm - ia64 - XEN while the others are still missing. Userland is believed to be fully converted with the changes contained here. Some technical notes: - This commit may be considered an ABI nop for all the architectures different from amd64 and ia64 (and sparc64 in the future) - per-cpu members, which are now converted to cpuset_t, needs to be accessed avoiding migration, because the size of cpuset_t should be considered unknown - size of cpuset_t objects is different from kernel and userland (this is primirally done in order to leave some more space in userland to cope with KBI extensions). If you need to access kernel cpuset_t from the userland please refer to example in this patch on how to do that correctly (kgdb may be a good source, for example). - Support for other architectures is going to be added soon - Only MAXCPU for amd64 is bumped now The patch has been tested by sbruno and Nicholas Esborn on opteron 4 x 12 pack CPUs. More testing on big SMP is expected to came soon. pluknet tested the patch with his 8-ways on both amd64 and i386. Tested by: pluknet, sbruno, gianni, Nicholas Esborn Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
device_printf(sc->acpi_dev, "Failed to suspend APs\n");
return (0); /* couldn't sleep */
}
#endif
#ifdef __amd64__
hw_ibrs_active = 0;
hw_ssb_active = 0;
cpu_stdext_feature3 = 0;
CPU_FOREACH(i) {
pc = pcpu_find(i);
pc->pc_ibpb_set = 0;
}
#endif
WAKECODE_FIXUP(resume_beep, uint8_t, (acpi_resume_beep != 0));
WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0));
#ifdef __amd64__
WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER) &
~(EFER_LMA));
#else
WAKECODE_FIXUP(wakeup_cr4, register_t, pcb->pcb_cr4);
#endif
WAKECODE_FIXUP(wakeup_pcb, struct pcb *, pcb);
WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit);
WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base);
Remove the permanent double mapping of low physical memory and replace it by a transient double mapping for the one instruction in ACPI wakeup where it is needed (and for many surrounding instructions in ACPI resume). Invalidate the TLB as soon as convenient after undoing the transient mapping. ACPI resume already has the strict ordering needed for this. This fixes the non-trapping of null pointers and other garbage pointers below NBPDR (except transiently). NBPDR is quite large (4MB, or 2MB for PAE). This fixes spurious traps at the first instruction in VM86 bioscalls. The traps are for transiently missing read permission in the first VM86 page (physical page 0) which was just written to at KERNBASE in the kernel. The mechanism is unknown (it is not simply PG_G). locore uses a similar but larger transient double mapping and needs it for 2 instructions instead of 1. Unmap the first PDE in it after the 2 instructions to detect most garbage pointers while bootstrapping. pmap_bootstrap() finishes the unmapping. Remove the avoidance of the double mapping for a recently fixed special case. ACPI resume could use this avoidance (made non-special) to avoid any problems with the transient double mapping, but no such problems are known. Update comments in locore. Many were for old versions of FreeBSD which tried to map low memory r/o except for special cases, or might have allowed access to low memory via physical offsets. Now all kernel maps are r/w, and removal of of the double map disallows use of physical offsets again.
2017-12-18 13:53:22 +00:00
#ifdef __i386__
/*
* Map some low memory with virt == phys for ACPI wakecode
* to use to jump to high memory after enabling paging. This
* is the same as for similar jump in locore, except the
* jump is a single instruction, and we know its address
* more precisely so only need a single PTD, and we have to
* be careful to use the kernel map (PTD[0] is for curthread
* which may be a user thread in deprecated APIs).
*/
i386 4/4G split. The change makes the user and kernel address spaces on i386 independent, giving each almost the full 4G of usable virtual addresses except for one PDE at top used for trampoline and per-CPU trampoline stacks, and system structures that must be always mapped, namely IDT, GDT, common TSS and LDT, and process-private TSS and LDT if allocated. By using 1:1 mapping for the kernel text and data, it appeared possible to eliminate assembler part of the locore.S which bootstraps initial page table and KPTmap. The code is rewritten in C and moved into the pmap_cold(). The comment in vmparam.h explains the KVA layout. There is no PCID mechanism available in protected mode, so each kernel/user switch forth and back completely flushes the TLB, except for the trampoline PTD region. The TLB invalidations for userspace becomes trivial, because IPI handlers switch page tables. On the other hand, context switches no longer need to reload %cr3. copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for new copyout(9) is compatibility with wiring user buffers around sysctl handlers. This explains two kind of locks for copyout ptes and accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow path, is only tried after the 'fast path' failed, which temporary changes mapping to the userspace and copies the data to/from small per-cpu buffer in the trampoline. If a page fault occurs during the copy, it is short-circuit by exception.s to not even reach C code. The change was motivated by the need to implement the Meltdown mitigation, but instead of KPTI the full split is done. The i386 architecture already shows the sizing problems, in particular, it is impossible to link clang and lld with debugging. I expect that the issues due to the virtual address space limits would only exaggerate and the split gives more liveness to the platform. Tested by: pho Discussed with: bde Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D14633
2018-04-13 20:30:49 +00:00
PTD[KPTDI] = PTD[LOWPTDI];
Remove the permanent double mapping of low physical memory and replace it by a transient double mapping for the one instruction in ACPI wakeup where it is needed (and for many surrounding instructions in ACPI resume). Invalidate the TLB as soon as convenient after undoing the transient mapping. ACPI resume already has the strict ordering needed for this. This fixes the non-trapping of null pointers and other garbage pointers below NBPDR (except transiently). NBPDR is quite large (4MB, or 2MB for PAE). This fixes spurious traps at the first instruction in VM86 bioscalls. The traps are for transiently missing read permission in the first VM86 page (physical page 0) which was just written to at KERNBASE in the kernel. The mechanism is unknown (it is not simply PG_G). locore uses a similar but larger transient double mapping and needs it for 2 instructions instead of 1. Unmap the first PDE in it after the 2 instructions to detect most garbage pointers while bootstrapping. pmap_bootstrap() finishes the unmapping. Remove the avoidance of the double mapping for a recently fixed special case. ACPI resume could use this avoidance (made non-special) to avoid any problems with the transient double mapping, but no such problems are known. Update comments in locore. Many were for old versions of FreeBSD which tried to map low memory r/o except for special cases, or might have allowed access to low memory via physical offsets. Now all kernel maps are r/w, and removal of of the double map disallows use of physical offsets again.
2017-12-18 13:53:22 +00:00
#endif
/* Call ACPICA to enter the desired sleep state */
if (state == ACPI_STATE_S4 && sc->acpi_s4bios)
status = AcpiEnterSleepStateS4bios();
else
2012-08-16 20:54:52 +00:00
status = AcpiEnterSleepState(state);
if (ACPI_FAILURE(status)) {
device_printf(sc->acpi_dev,
"AcpiEnterSleepState failed - %s\n",
AcpiFormatException(status));
return (0); /* couldn't sleep */
}
if (acpi_susp_bounce)
resumectx(pcb);
for (;;)
ia32_pause();
} else {
/*
* Re-initialize console hardware as soon as possibe.
* No console output (e.g. printf) is allowed before
* this point.
*/
cnresume();
#ifdef __amd64__
fpuresume(susppcbs[0]->sp_fpususpend);
#else
npxresume(susppcbs[0]->sp_fpususpend);
#endif
}
return (1); /* wakeup successfully */
}
int
acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result,
int intr_enabled)
{
if (sleep_result == -1)
return (sleep_result);
if (!intr_enabled) {
/* Wakeup MD procedures in interrupt disabled context */
if (sleep_result == 1) {
ucode_reload();
pmap_init_pat();
initializecpu();
PCPU_SET(switchtime, 0);
PCPU_SET(switchticks, ticks);
#ifdef DEV_APIC
lapic_xapic_mode();
#endif
#ifdef SMP
if (!CPU_EMPTY(&suspcpus))
acpi_wakeup_cpus(sc);
#endif
}
#ifdef SMP
if (!CPU_EMPTY(&suspcpus))
Use resume_cpus() instead of restart_cpus() to resume from ACPI suspension. restart_cpus() worked well enough by accident. Before this set of fixes, resume_cpus() used the same cpuset (started_cpus, meaning CPUs directed to restart) as restart_cpus(). resume_cpus() waited for the wrong cpuset (stopped_cpus) to become empty, but since mixtures of stopped and suspended CPUs are not close to working, stopped_cpus must be empty when resuming so the wait is null -- restart_cpus just allows the other CPUs to restart and returns without waiting. Fix resume_cpus() to wait on a non-wrong cpuset for the ACPI case, and add further kludges to try to keep it working for the XEN case. It was only used for XEN. It waited on suspended_cpus. This works for XEN. However, for ACPI, resuming is a 2-step process. ACPI has already woken up the other CPUs and removed them from suspended_cpus. This fix records the move by putting them in a new cpuset resuming_cpus. Waiting on suspended_cpus would give the same null wait as waiting on stopped_cpus. Wait on resuming_cpus instead. Add a cpuset toresume_cpus to map the CPUs being told to resume to keep this separate from the cpuset started_cpus for mapping the CPUs being told to restart. Mixtures of stopped and suspended/resuming CPUs are still far from working. Describe new and some old cpusets in comments. Add further kludges to cpususpend_handler() to try to avoid breaking it for XEN. XEN doesn't use resumectx(), so it doesn't use the second return path for savectx(), and it goes from the suspended state directly to the restarted state, while ACPI resume goes through the resuming state. Enter the resuming state early for all cases so that resume_cpus can test for being in this state and not have to worry about the intermediate !suspended state for ACPI only. Reviewed by: kib
2017-12-21 09:17:48 +00:00
resume_cpus(suspcpus);
#endif
mca_resume();
#ifdef __amd64__
if (vmm_resume_p != NULL)
vmm_resume_p();
#endif
Add support for suspend/resume/migration operations when running as a Xen PVHVM guest. Submitted by: Roger Pau Monné Sponsored by: Citrix Systems R&D Reviewed by: gibbs Approved by: re (blanket Xen) MFC after: 2 weeks sys/amd64/amd64/mp_machdep.c: sys/i386/i386/mp_machdep.c: - Make sure that are no MMU related IPIs pending on migration. - Reset pending IPI_BITMAP on resume. - Init vcpu_info on resume. sys/amd64/include/intr_machdep.h: sys/i386/include/intr_machdep.h: sys/x86/acpica/acpi_wakeup.c: sys/x86/x86/intr_machdep.c: sys/x86/isa/atpic.c: sys/x86/x86/io_apic.c: sys/x86/x86/local_apic.c: - Add a "suspend_cancelled" parameter to pic_resume(). For the Xen PIC, restoration of interrupt services differs between the aborted suspend and normal resume cases, so we must provide this information. sys/dev/acpica/acpi_timer.c: sys/dev/xen/timer/timer.c: sys/timetc.h: - Don't swap out "suspend safe" timers across a suspend/resume cycle. This includes the Xen PV and ACPI timers. sys/dev/xen/control/control.c: - Perform proper suspend/resume process for PVHVM: - Suspend all APs before going into suspension, this allows us to reset the vcpu_info on resume for each AP. - Reset shared info page and callback on resume. sys/dev/xen/timer/timer.c: - Implement suspend/resume support for the PV timer. Since FreeBSD doesn't perform a per-cpu resume of the timer, we need to call smp_rendezvous in order to correctly resume the timer on each CPU. sys/dev/xen/xenpci/xenpci.c: - Don't reset the PCI interrupt on each suspend/resume. sys/kern/subr_smp.c: - When suspending a PVHVM domain make sure there are no MMU IPIs in-flight, or we will get a lockup on resume due to the fact that pending event channels are not carried over on migration. - Implement a generic version of restart_cpus that can be used by suspended and stopped cpus. sys/x86/xen/hvm.c: - Implement resume support for the hypercall page and shared info. - Clear vcpu_info so it can be reset by APs when resuming from suspension. sys/dev/xen/xenpci/xenpci.c: sys/x86/xen/hvm.c: sys/x86/xen/xen_intr.c: - Support UP kernel configurations. sys/x86/xen/xen_intr.c: - Properly rebind per-cpus VIRQs and IPIs on resume.
2013-09-20 05:06:03 +00:00
intr_resume(/*suspend_cancelled*/false);
2015-06-18 23:14:45 +00:00
AcpiSetFirmwareWakingVector(0, 0);
} else {
/* Wakeup MD procedures in interrupt enabled context */
if (sleep_result == 1 && mem_range_softc.mr_op != NULL &&
mem_range_softc.mr_op->reinit != NULL)
mem_range_softc.mr_op->reinit(&mem_range_softc);
}
return (sleep_result);
}
static void *
acpi_alloc_wakeup_handler(void *wakepages[ACPI_WAKEPAGES])
{
int i;
memset(wakepages, 0, ACPI_WAKEPAGES * sizeof(*wakepages));
/*
* Specify the region for our wakeup code. We want it in the low 1 MB
* region, excluding real mode IVT (0-0x3ff), BDA (0x400-0x4ff), EBDA
* (less than 128KB, below 0xa0000, must be excluded by SMAP and DSDT),
* and ROM area (0xa0000 and above). The temporary page tables must be
* page-aligned.
*/
for (i = 0; i < ACPI_WAKEPAGES; i++) {
wakepages[i] = contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT,
0x500, 0xa0000, PAGE_SIZE, 0ul);
if (wakepages[i] == NULL) {
printf("%s: can't alloc wake memory\n", __func__);
goto freepages;
}
}
if (EVENTHANDLER_REGISTER(power_resume, acpi_stop_beep, NULL,
EVENTHANDLER_PRI_LAST) == NULL) {
printf("%s: can't register event handler\n", __func__);
goto freepages;
}
susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK);
Add support for the extended FPU states on amd64, both for native 64bit and 32bit ABIs. As a side-effect, it enables AVX on capable CPUs. In particular: - Query the CPU support for XSAVE, list of the supported extensions and the required size of FPU save area. The hw.use_xsave tunable is provided for disabling XSAVE, and hw.xsave_mask may be used to select the enabled extensions. - Remove the FPU save area from PCB and dynamically allocate the (run-time sized) user save area on the top of the kernel stack, right above the PCB. Reorganize the thread0 PCB initialization to postpone it after BSP is queried for save area size. - The dumppcb, stoppcbs and susppcbs now do not carry the FPU state as well. FPU state is only useful for suspend, where it is saved in dynamically allocated suspfpusave area. - Use XSAVE and XRSTOR to save/restore FPU state, if supported and enabled. - Define new mcontext_t flag _MC_HASFPXSTATE, indicating that mcontext_t has a valid pointer to out-of-struct extended FPU state. Signal handlers are supplied with stack-allocated fpu state. The sigreturn(2) and setcontext(2) syscall honour the flag, allowing the signal handlers to inspect and manipilate extended state in the interrupted context. - The getcontext(2) never returns extended state, since there is no place in the fixed-sized mcontext_t to place variable-sized save area. And, since mcontext_t is embedded into ucontext_t, makes it impossible to fix in a reasonable way. Instead of extending getcontext(2) syscall, provide a sysarch(2) facility to query extended FPU state. - Add ptrace(2) support for getting and setting extended state; while there, implement missed PT_I386_{GET,SET}XMMREGS for 32bit binaries. - Change fpu_kern KPI to not expose struct fpu_kern_ctx layout to consumers, making it opaque. Internally, struct fpu_kern_ctx now contains a space for the extended state. Convert in-kernel consumers of fpu_kern KPI both on i386 and amd64. First version of the support for AVX was submitted by Tim Bird <tim.bird am sony com> on behalf of Sony. This version was written from scratch. Tested by: pho (previous version), Yamagi Burmeister <lists yamagi org> MFC after: 1 month
2012-01-21 17:45:27 +00:00
for (i = 0; i < mp_ncpus; i++) {
susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK);
susppcbs[i]->sp_fpususpend = alloc_fpusave(M_WAITOK);
Add support for the extended FPU states on amd64, both for native 64bit and 32bit ABIs. As a side-effect, it enables AVX on capable CPUs. In particular: - Query the CPU support for XSAVE, list of the supported extensions and the required size of FPU save area. The hw.use_xsave tunable is provided for disabling XSAVE, and hw.xsave_mask may be used to select the enabled extensions. - Remove the FPU save area from PCB and dynamically allocate the (run-time sized) user save area on the top of the kernel stack, right above the PCB. Reorganize the thread0 PCB initialization to postpone it after BSP is queried for save area size. - The dumppcb, stoppcbs and susppcbs now do not carry the FPU state as well. FPU state is only useful for suspend, where it is saved in dynamically allocated suspfpusave area. - Use XSAVE and XRSTOR to save/restore FPU state, if supported and enabled. - Define new mcontext_t flag _MC_HASFPXSTATE, indicating that mcontext_t has a valid pointer to out-of-struct extended FPU state. Signal handlers are supplied with stack-allocated fpu state. The sigreturn(2) and setcontext(2) syscall honour the flag, allowing the signal handlers to inspect and manipilate extended state in the interrupted context. - The getcontext(2) never returns extended state, since there is no place in the fixed-sized mcontext_t to place variable-sized save area. And, since mcontext_t is embedded into ucontext_t, makes it impossible to fix in a reasonable way. Instead of extending getcontext(2) syscall, provide a sysarch(2) facility to query extended FPU state. - Add ptrace(2) support for getting and setting extended state; while there, implement missed PT_I386_{GET,SET}XMMREGS for 32bit binaries. - Change fpu_kern KPI to not expose struct fpu_kern_ctx layout to consumers, making it opaque. Internally, struct fpu_kern_ctx now contains a space for the extended state. Convert in-kernel consumers of fpu_kern KPI both on i386 and amd64. First version of the support for AVX was submitted by Tim Bird <tim.bird am sony com> on behalf of Sony. This version was written from scratch. Tested by: pho (previous version), Yamagi Burmeister <lists yamagi org> MFC after: 1 month
2012-01-21 17:45:27 +00:00
}
return (wakepages);
freepages:
for (i = 0; i < ACPI_WAKEPAGES; i++)
if (wakepages[i] != NULL)
contigfree(wakepages[i], PAGE_SIZE, M_DEVBUF);
return (NULL);
}
void
acpi_install_wakeup_handler(struct acpi_softc *sc)
{
static void *wakeaddr;
void *wakepages[ACPI_WAKEPAGES];
#ifdef __amd64__
uint64_t *pt4, *pt3, *pt2;
vm_paddr_t pt4pa, pt3pa, pt2pa;
int i;
#endif
if (wakeaddr != NULL)
return;
if (acpi_alloc_wakeup_handler(wakepages) == NULL)
return;
wakeaddr = wakepages[0];
sc->acpi_wakeaddr = (vm_offset_t)wakeaddr;
sc->acpi_wakephys = vtophys(wakeaddr);
#ifdef __amd64__
pt4 = wakepages[1];
pt3 = wakepages[2];
pt2 = wakepages[3];
pt4pa = vtophys(pt4);
pt3pa = vtophys(pt3);
pt2pa = vtophys(pt2);
#endif
bcopy(wakecode, (void *)sc->acpi_wakeaddr, sizeof(wakecode));
/* Patch GDT base address, ljmp targets. */
WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t,
sc->acpi_wakephys + bootgdt);
WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t,
sc->acpi_wakephys + wakeup_32);
#ifdef __amd64__
WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t,
sc->acpi_wakephys + wakeup_64);
WAKECODE_FIXUP(wakeup_pagetables, uint32_t, pt4pa);
#endif
/* Save pointers to some global data. */
WAKECODE_FIXUP(wakeup_ret, void *, resumectx);
#ifndef __amd64__
#if defined(PAE) || defined(PAE_TABLES)
WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdpt));
#else
WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdir));
#endif
#else /* __amd64__ */
/* Create the initial 1GB replicated page tables */
for (i = 0; i < 512; i++) {
/*
* Each slot of the level 4 pages points
* to the same level 3 page
*/
pt4[i] = (uint64_t)pt3pa;
pt4[i] |= PG_V | PG_RW | PG_U;
/*
* Each slot of the level 3 pages points
* to the same level 2 page
*/
pt3[i] = (uint64_t)pt2pa;
pt3[i] |= PG_V | PG_RW | PG_U;
/* The level 2 page slots are mapped with 2MB pages for 1GB. */
pt2[i] = i * (2 * 1024 * 1024);
pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
}
#endif /* !__amd64__ */
if (bootverbose)
device_printf(sc->acpi_dev, "wakeup code va %#jx pa %#jx\n",
(uintmax_t)sc->acpi_wakeaddr, (uintmax_t)sc->acpi_wakephys);
}