freebsd-dev/sys/x86/include/x86_var.h

148 lines
4.4 KiB
C
Raw Normal View History

/*-
* Copyright (c) 1995 Bruce D. Evans.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the author nor the names of contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _X86_X86_VAR_H_
#define _X86_X86_VAR_H_
/*
* Miscellaneous machine-dependent declarations.
*/
extern long Maxmem;
extern u_int basemem;
extern int busdma_swi_pending;
extern u_int cpu_exthigh;
extern u_int cpu_feature;
extern u_int cpu_feature2;
extern u_int amd_feature;
extern u_int amd_feature2;
extern u_int amd_rascap;
extern u_int amd_pminfo;
extern u_int amd_extended_feature_extensions;
extern u_int via_feature_rng;
extern u_int via_feature_xcrypt;
extern u_int cpu_clflush_line_size;
extern u_int cpu_stdext_feature;
extern u_int cpu_stdext_feature2;
extern u_int cpu_stdext_feature3;
extern uint64_t cpu_ia32_arch_caps;
extern u_int cpu_fxsr;
extern u_int cpu_high;
extern u_int cpu_id;
extern u_int cpu_max_ext_state_size;
extern u_int cpu_mxcsr_mask;
extern u_int cpu_procinfo;
extern u_int cpu_procinfo2;
extern char cpu_vendor[];
extern u_int cpu_vendor_id;
extern u_int cpu_mon_mwait_flags;
extern u_int cpu_mon_min_size;
extern u_int cpu_mon_max_size;
extern u_int cpu_maxphyaddr;
extern char ctx_switch_xsave[];
extern u_int hv_high;
extern char hv_vendor[];
extern char kstack[];
extern char sigcode[];
extern int szsigcode;
extern int vm_page_dump_size;
extern int workaround_erratum383;
extern int _udatasel;
extern int _ucodesel;
extern int _ucode32sel;
extern int _ufssel;
extern int _ugssel;
extern int use_xsave;
extern uint64_t xsave_mask;
extern u_int max_apic_id;
PTI for amd64. The implementation of the Kernel Page Table Isolation (KPTI) for amd64, first version. It provides a workaround for the 'meltdown' vulnerability. PTI is turned off by default for now, enable with the loader tunable vm.pmap.pti=1. The pmap page table is split into kernel-mode table and user-mode table. Kernel-mode table is identical to the non-PTI table, while usermode table is obtained from kernel table by leaving userspace mappings intact, but only leaving the following parts of the kernel mapped: kernel text (but not modules text) PCPU GDT/IDT/user LDT/task structures IST stacks for NMI and doublefault handlers. Kernel switches to user page table before returning to usermode, and restores full kernel page table on the entry. Initial kernel-mode stack for PTI trampoline is allocated in PCPU, it is only 16 qwords. Kernel entry trampoline switches page tables. then the hardware trap frame is copied to the normal kstack, and execution continues. IST stacks are kept mapped and no trampoline is needed for NMI/doublefault, but of course page table switch is performed. On return to usermode, the trampoline is used again, iret frame is copied to the trampoline stack, page tables are switched and iretq is executed. The case of iretq faulting due to the invalid usermode context is tricky, since the frame for fault is appended to the trampoline frame. Besides copying the fault frame and original (corrupted) frame to kstack, the fault frame must be patched to make it look as if the fault occured on the kstack, see the comment in doret_iret detection code in trap(). Currently kernel pages which are mapped during trampoline operation are identical for all pmaps. They are registered using pmap_pti_add_kva(). Besides initial registrations done during boot, LDT and non-common TSS segments are registered if user requested their use. In principle, they can be installed into kernel page table per pmap with some work. Similarly, PCPU can be hidden from userspace mapping using trampoline PCPU page, but again I do not see much benefits besides complexity. PDPE pages for the kernel half of the user page tables are pre-allocated during boot because we need to know pml4 entries which are copied to the top-level paging structure page, in advance on a new pmap creation. I enforce this to avoid iterating over the all existing pmaps if a new PDPE page is needed for PTI kernel mappings. The iteration is a known problematic operation on i386. The need to flush hidden kernel translations on the switch to user mode make global tables (PG_G) meaningless and even harming, so PG_G use is disabled for PTI case. Our existing use of PCID is incompatible with PTI and is automatically disabled if PTI is enabled. PCID can be forced on only for developer's benefit. MCE is known to be broken, it requires IST stack to operate completely correctly even for non-PTI case, and absolutely needs dedicated IST stack because MCE delivery while trampoline did not switched from PTI stack is fatal. The fix is pending. Reviewed by: markj (partially) Tested by: pho (previous version) Discussed with: jeff, jhb Sponsored by: The FreeBSD Foundation MFC after: 2 weeks
2018-01-17 11:44:21 +00:00
extern int pti;
struct pcb;
struct thread;
struct reg;
struct fpreg;
struct dbreg;
struct dumperinfo;
Handle broadcast NMIs. On several Intel chipsets, diagnostic NMIs sent from BMC or NMIs reporting hardware errors are broadcasted to all CPUs. When kernel is configured to enter kdb on NMI, the outcome is problematic, because each CPU tries to enter kdb. All CPUs are executing NMI handlers, which set the latches disabling the nested NMI delivery; this means that stop_cpus_hard(), used by kdb_enter() to stop other cpus by broadcasting IPI_STOP_HARD NMI, cannot work. One indication of this is the harmless but annoying diagnostic "timeout stopping cpus". Much more harming behaviour is that because all CPUs try to enter kdb, and if ddb is used as debugger, all CPUs issue prompt on console and race for the input, not to mention the simultaneous use of the ddb shared state. Try to fix this by introducing a pseudo-lock for simultaneous attempts to handle NMIs. If one core happens to enter NMI trap handler, other cores see it and simulate reception of the IPI_STOP_HARD. More, generic_stop_cpus() avoids sending IPI_STOP_HARD and avoids waiting for the acknowledgement, relying on the nmi handler on other cores suspending and then restarting the CPU. Since it is impossible to detect at runtime whether some stray NMI is broadcast or unicast, add a knob for administrator (really developer) to configure debugging NMI handling mode. The updated patch was debugged with the help from Andrey Gapon (avg) and discussed with him. Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D8249
2016-10-24 16:40:27 +00:00
struct trapframe;
/*
* The interface type of the interrupt handler entry point cannot be
* expressed in C. Use simplest non-variadic function type as an
* approximation.
*/
typedef void alias_for_inthand_t(void);
/*
* Returns the maximum physical address that can be used with the
* current system.
*/
static __inline vm_paddr_t
cpu_getmaxphyaddr(void)
{
#if defined(__i386__) && !defined(PAE)
return (0xffffffff);
#else
return ((1ULL << cpu_maxphyaddr) - 1);
#endif
}
void *alloc_fpusave(int flags);
void busdma_swi(void);
bool cpu_mwait_usable(void);
void cpu_probe_amdc1e(void);
void cpu_setregs(void);
void dump_add_page(vm_paddr_t);
void dump_drop_page(vm_paddr_t);
void finishidentcpu(void);
void identify_cpu1(void);
void identify_cpu2(void);
void identify_hypervisor(void);
void initializecpu(void);
void initializecpucache(void);
bool fix_cpuid(void);
void fillw(int /*u_short*/ pat, void *base, size_t cnt);
int is_physical_memory(vm_paddr_t addr);
int isa_nmi(int cd);
void handle_ibrs_entry(void);
void handle_ibrs_exit(void);
void hw_ibrs_recalculate(void);
void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame);
void nmi_call_kdb_smp(u_int type, struct trapframe *frame);
void nmi_handle_intr(u_int type, struct trapframe *frame);
void pagecopy(void *from, void *to);
void printcpuinfo(void);
int pti_get_default(void);
int user_dbreg_trap(void);
int minidumpsys(struct dumperinfo *);
struct pcb *get_pcb_td(struct thread *td);
#endif