PTI for amd64.
The implementation of the Kernel Page Table Isolation (KPTI) for amd64, first version. It provides a workaround for the 'meltdown' vulnerability. PTI is turned off by default for now, enable with the loader tunable vm.pmap.pti=1. The pmap page table is split into kernel-mode table and user-mode table. Kernel-mode table is identical to the non-PTI table, while usermode table is obtained from kernel table by leaving userspace mappings intact, but only leaving the following parts of the kernel mapped: kernel text (but not modules text) PCPU GDT/IDT/user LDT/task structures IST stacks for NMI and doublefault handlers. Kernel switches to user page table before returning to usermode, and restores full kernel page table on the entry. Initial kernel-mode stack for PTI trampoline is allocated in PCPU, it is only 16 qwords. Kernel entry trampoline switches page tables. then the hardware trap frame is copied to the normal kstack, and execution continues. IST stacks are kept mapped and no trampoline is needed for NMI/doublefault, but of course page table switch is performed. On return to usermode, the trampoline is used again, iret frame is copied to the trampoline stack, page tables are switched and iretq is executed. The case of iretq faulting due to the invalid usermode context is tricky, since the frame for fault is appended to the trampoline frame. Besides copying the fault frame and original (corrupted) frame to kstack, the fault frame must be patched to make it look as if the fault occured on the kstack, see the comment in doret_iret detection code in trap(). Currently kernel pages which are mapped during trampoline operation are identical for all pmaps. They are registered using pmap_pti_add_kva(). Besides initial registrations done during boot, LDT and non-common TSS segments are registered if user requested their use. In principle, they can be installed into kernel page table per pmap with some work. Similarly, PCPU can be hidden from userspace mapping using trampoline PCPU page, but again I do not see much benefits besides complexity. PDPE pages for the kernel half of the user page tables are pre-allocated during boot because we need to know pml4 entries which are copied to the top-level paging structure page, in advance on a new pmap creation. I enforce this to avoid iterating over the all existing pmaps if a new PDPE page is needed for PTI kernel mappings. The iteration is a known problematic operation on i386. The need to flush hidden kernel translations on the switch to user mode make global tables (PG_G) meaningless and even harming, so PG_G use is disabled for PTI case. Our existing use of PCID is incompatible with PTI and is automatically disabled if PTI is enabled. PCID can be forced on only for developer's benefit. MCE is known to be broken, it requires IST stack to operate completely correctly even for non-PTI case, and absolutely needs dedicated IST stack because MCE delivery while trampoline did not switched from PTI stack is fatal. The fix is pending. Reviewed by: markj (partially) Tested by: pho (previous version) Discussed with: jeff, jhb Sponsored by: The FreeBSD Foundation MFC after: 2 weeks
This commit is contained in:
parent
72f9a98571
commit
c35d24e497
@ -2,6 +2,12 @@
|
||||
* Copyright (c) 1989, 1990 William F. Jolitz.
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2018 The FreeBSD Foundation
|
||||
* All rights reserved.
|
||||
*
|
||||
* Portions of this software were developed by
|
||||
* Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
|
||||
* the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -38,12 +44,12 @@
|
||||
|
||||
#include "opt_smp.h"
|
||||
|
||||
#include "assym.s"
|
||||
|
||||
#include <machine/asmacros.h>
|
||||
#include <machine/specialreg.h>
|
||||
#include <x86/apicreg.h>
|
||||
|
||||
#include "assym.s"
|
||||
|
||||
#ifdef SMP
|
||||
#define LK lock ;
|
||||
#else
|
||||
@ -73,30 +79,28 @@ as_lapic_eoi:
|
||||
* translates that into a vector, and passes the vector to the
|
||||
* lapic_handle_intr() function.
|
||||
*/
|
||||
#define ISR_VEC(index, vec_name) \
|
||||
.text ; \
|
||||
SUPERALIGN_TEXT ; \
|
||||
IDTVEC(vec_name) ; \
|
||||
PUSH_FRAME ; \
|
||||
FAKE_MCOUNT(TF_RIP(%rsp)) ; \
|
||||
cmpl $0,x2apic_mode ; \
|
||||
je 1f ; \
|
||||
movl $(MSR_APIC_ISR0 + index),%ecx ; \
|
||||
rdmsr ; \
|
||||
jmp 2f ; \
|
||||
1: ; \
|
||||
movq lapic_map, %rdx ; /* pointer to local APIC */ \
|
||||
movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \
|
||||
2: ; \
|
||||
bsrl %eax, %eax ; /* index of highest set bit in ISR */ \
|
||||
jz 3f ; \
|
||||
addl $(32 * index),%eax ; \
|
||||
movq %rsp, %rsi ; \
|
||||
movl %eax, %edi ; /* pass the IRQ */ \
|
||||
call lapic_handle_intr ; \
|
||||
3: ; \
|
||||
MEXITCOUNT ; \
|
||||
.macro ISR_VEC index, vec_name
|
||||
INTR_HANDLER \vec_name
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
cmpl $0,x2apic_mode
|
||||
je 1f
|
||||
movl $(MSR_APIC_ISR0 + \index),%ecx
|
||||
rdmsr
|
||||
jmp 2f
|
||||
1:
|
||||
movq lapic_map, %rdx /* pointer to local APIC */
|
||||
movl LA_ISR + 16 * (\index)(%rdx), %eax /* load ISR */
|
||||
2:
|
||||
bsrl %eax, %eax /* index of highest set bit in ISR */
|
||||
jz 3f
|
||||
addl $(32 * \index),%eax
|
||||
movq %rsp, %rsi
|
||||
movl %eax, %edi /* pass the IRQ */
|
||||
call lapic_handle_intr
|
||||
3:
|
||||
MEXITCOUNT
|
||||
jmp doreti
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Handle "spurious INTerrupts".
|
||||
@ -108,26 +112,21 @@ IDTVEC(vec_name) ; \
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(spuriousint)
|
||||
|
||||
/* No EOI cycle used here */
|
||||
|
||||
jmp doreti_iret
|
||||
|
||||
ISR_VEC(1, apic_isr1)
|
||||
ISR_VEC(2, apic_isr2)
|
||||
ISR_VEC(3, apic_isr3)
|
||||
ISR_VEC(4, apic_isr4)
|
||||
ISR_VEC(5, apic_isr5)
|
||||
ISR_VEC(6, apic_isr6)
|
||||
ISR_VEC(7, apic_isr7)
|
||||
ISR_VEC 1, apic_isr1
|
||||
ISR_VEC 2, apic_isr2
|
||||
ISR_VEC 3, apic_isr3
|
||||
ISR_VEC 4, apic_isr4
|
||||
ISR_VEC 5, apic_isr5
|
||||
ISR_VEC 6, apic_isr6
|
||||
ISR_VEC 7, apic_isr7
|
||||
|
||||
/*
|
||||
* Local APIC periodic timer handler.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(timerint)
|
||||
PUSH_FRAME
|
||||
INTR_HANDLER timerint
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
movq %rsp, %rdi
|
||||
call lapic_handle_timer
|
||||
@ -137,10 +136,7 @@ IDTVEC(timerint)
|
||||
/*
|
||||
* Local APIC CMCI handler.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(cmcint)
|
||||
PUSH_FRAME
|
||||
INTR_HANDLER cmcint
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
call lapic_handle_cmc
|
||||
MEXITCOUNT
|
||||
@ -149,10 +145,7 @@ IDTVEC(cmcint)
|
||||
/*
|
||||
* Local APIC error interrupt handler.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(errorint)
|
||||
PUSH_FRAME
|
||||
INTR_HANDLER errorint
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
call lapic_handle_error
|
||||
MEXITCOUNT
|
||||
@ -163,10 +156,7 @@ IDTVEC(errorint)
|
||||
* Xen event channel upcall interrupt handler.
|
||||
* Only used when the hypervisor supports direct vector callbacks.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(xen_intr_upcall)
|
||||
PUSH_FRAME
|
||||
INTR_HANDLER xen_intr_upcall
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
movq %rsp, %rdi
|
||||
call xen_intr_handle_upcall
|
||||
@ -183,74 +173,48 @@ IDTVEC(xen_intr_upcall)
|
||||
SUPERALIGN_TEXT
|
||||
invltlb_ret:
|
||||
call as_lapic_eoi
|
||||
POP_FRAME
|
||||
jmp doreti_iret
|
||||
jmp ld_regs
|
||||
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(invltlb)
|
||||
PUSH_FRAME
|
||||
|
||||
INTR_HANDLER invltlb
|
||||
call invltlb_handler
|
||||
jmp invltlb_ret
|
||||
|
||||
IDTVEC(invltlb_pcid)
|
||||
PUSH_FRAME
|
||||
|
||||
INTR_HANDLER invltlb_pcid
|
||||
call invltlb_pcid_handler
|
||||
jmp invltlb_ret
|
||||
|
||||
IDTVEC(invltlb_invpcid)
|
||||
PUSH_FRAME
|
||||
|
||||
INTR_HANDLER invltlb_invpcid
|
||||
call invltlb_invpcid_handler
|
||||
jmp invltlb_ret
|
||||
|
||||
/*
|
||||
* Single page TLB shootdown
|
||||
*/
|
||||
.text
|
||||
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(invlpg)
|
||||
PUSH_FRAME
|
||||
|
||||
INTR_HANDLER invlpg
|
||||
call invlpg_handler
|
||||
jmp invltlb_ret
|
||||
|
||||
/*
|
||||
* Page range TLB shootdown.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(invlrng)
|
||||
PUSH_FRAME
|
||||
|
||||
INTR_HANDLER invlrng
|
||||
call invlrng_handler
|
||||
jmp invltlb_ret
|
||||
|
||||
/*
|
||||
* Invalidate cache.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(invlcache)
|
||||
PUSH_FRAME
|
||||
|
||||
INTR_HANDLER invlcache
|
||||
call invlcache_handler
|
||||
jmp invltlb_ret
|
||||
|
||||
/*
|
||||
* Handler for IPIs sent via the per-cpu IPI bitmap.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(ipi_intr_bitmap_handler)
|
||||
PUSH_FRAME
|
||||
|
||||
INTR_HANDLER ipi_intr_bitmap_handler
|
||||
call as_lapic_eoi
|
||||
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
|
||||
call ipi_bitmap_handler
|
||||
MEXITCOUNT
|
||||
jmp doreti
|
||||
@ -258,24 +222,15 @@ IDTVEC(ipi_intr_bitmap_handler)
|
||||
/*
|
||||
* Executed by a CPU when it receives an IPI_STOP from another CPU.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(cpustop)
|
||||
PUSH_FRAME
|
||||
|
||||
INTR_HANDLER cpustop
|
||||
call as_lapic_eoi
|
||||
|
||||
call cpustop_handler
|
||||
jmp doreti
|
||||
|
||||
/*
|
||||
* Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(cpususpend)
|
||||
PUSH_FRAME
|
||||
|
||||
INTR_HANDLER cpususpend
|
||||
call cpususpend_handler
|
||||
call as_lapic_eoi
|
||||
jmp doreti
|
||||
@ -285,10 +240,7 @@ IDTVEC(cpususpend)
|
||||
*
|
||||
* - Calls the generic rendezvous action function.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(rendezvous)
|
||||
PUSH_FRAME
|
||||
INTR_HANDLER rendezvous
|
||||
#ifdef COUNT_IPIS
|
||||
movl PCPU(CPUID), %eax
|
||||
movq ipi_rendezvous_counts(,%rax,8), %rax
|
||||
@ -328,4 +280,8 @@ IDTVEC(justreturn)
|
||||
popq %rax
|
||||
jmp doreti_iret
|
||||
|
||||
INTR_HANDLER justreturn1
|
||||
call as_lapic_eoi
|
||||
jmp doreti
|
||||
|
||||
#endif /* SMP */
|
||||
|
@ -36,38 +36,35 @@
|
||||
* master and slave interrupt controllers.
|
||||
*/
|
||||
|
||||
#include <machine/asmacros.h>
|
||||
|
||||
#include "assym.s"
|
||||
#include <machine/asmacros.h>
|
||||
|
||||
/*
|
||||
* Macros for interrupt entry, call to handler, and exit.
|
||||
*/
|
||||
#define INTR(irq_num, vec_name) \
|
||||
.text ; \
|
||||
SUPERALIGN_TEXT ; \
|
||||
IDTVEC(vec_name) ; \
|
||||
PUSH_FRAME ; \
|
||||
FAKE_MCOUNT(TF_RIP(%rsp)) ; \
|
||||
movq %rsp, %rsi ; \
|
||||
movl $irq_num, %edi; /* pass the IRQ */ \
|
||||
call atpic_handle_intr ; \
|
||||
MEXITCOUNT ; \
|
||||
.macro INTR irq_num, vec_name
|
||||
INTR_HANDLER \vec_name
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
movq %rsp, %rsi
|
||||
movl $\irq_num, %edi /* pass the IRQ */
|
||||
call atpic_handle_intr
|
||||
MEXITCOUNT
|
||||
jmp doreti
|
||||
.endm
|
||||
|
||||
INTR(0, atpic_intr0)
|
||||
INTR(1, atpic_intr1)
|
||||
INTR(2, atpic_intr2)
|
||||
INTR(3, atpic_intr3)
|
||||
INTR(4, atpic_intr4)
|
||||
INTR(5, atpic_intr5)
|
||||
INTR(6, atpic_intr6)
|
||||
INTR(7, atpic_intr7)
|
||||
INTR(8, atpic_intr8)
|
||||
INTR(9, atpic_intr9)
|
||||
INTR(10, atpic_intr10)
|
||||
INTR(11, atpic_intr11)
|
||||
INTR(12, atpic_intr12)
|
||||
INTR(13, atpic_intr13)
|
||||
INTR(14, atpic_intr14)
|
||||
INTR(15, atpic_intr15)
|
||||
INTR 0, atpic_intr0
|
||||
INTR 1, atpic_intr1
|
||||
INTR 2, atpic_intr2
|
||||
INTR 3, atpic_intr3
|
||||
INTR 4, atpic_intr4
|
||||
INTR 5, atpic_intr5
|
||||
INTR 6, atpic_intr6
|
||||
INTR 7, atpic_intr7
|
||||
INTR 8, atpic_intr8
|
||||
INTR 9, atpic_intr9
|
||||
INTR 10, atpic_intr10
|
||||
INTR 11, atpic_intr11
|
||||
INTR 12, atpic_intr12
|
||||
INTR 13, atpic_intr13
|
||||
INTR 14, atpic_intr14
|
||||
INTR 15, atpic_intr15
|
||||
|
@ -215,8 +215,10 @@ done_tss:
|
||||
movq %r8,PCPU(RSP0)
|
||||
movq %r8,PCPU(CURPCB)
|
||||
/* Update the TSS_RSP0 pointer for the next interrupt */
|
||||
cmpb $0,pti(%rip)
|
||||
jne 1f
|
||||
movq %r8,TSS_RSP0(%rdx)
|
||||
movq %r12,PCPU(CURTHREAD) /* into next thread */
|
||||
1: movq %r12,PCPU(CURTHREAD) /* into next thread */
|
||||
|
||||
/* Test if debug registers should be restored. */
|
||||
testl $PCB_DBREGS,PCB_FLAGS(%r8)
|
||||
@ -293,7 +295,12 @@ do_tss: movq %rdx,PCPU(TSSP)
|
||||
shrq $8,%rcx
|
||||
movl %ecx,8(%rax)
|
||||
movb $0x89,5(%rax) /* unset busy */
|
||||
movl $TSSSEL,%eax
|
||||
cmpb $0,pti(%rip)
|
||||
je 1f
|
||||
movq PCPU(PRVSPACE),%rax
|
||||
addq $PC_PTI_STACK+PC_PTI_STACK_SZ*8,%rax
|
||||
movq %rax,TSS_RSP0(%rdx)
|
||||
1: movl $TSSSEL,%eax
|
||||
ltr %ax
|
||||
jmp done_tss
|
||||
|
||||
|
@ -1,12 +1,16 @@
|
||||
/*-
|
||||
* Copyright (c) 1989, 1990 William F. Jolitz.
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* Copyright (c) 2007 The FreeBSD Foundation
|
||||
* Copyright (c) 2007-2018 The FreeBSD Foundation
|
||||
* All rights reserved.
|
||||
*
|
||||
* Portions of this software were developed by A. Joseph Koshy under
|
||||
* sponsorship from the FreeBSD Foundation and Google, Inc.
|
||||
*
|
||||
* Portions of this software were developed by
|
||||
* Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
|
||||
* the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
@ -38,13 +42,13 @@
|
||||
#include "opt_compat.h"
|
||||
#include "opt_hwpmc_hooks.h"
|
||||
|
||||
#include "assym.s"
|
||||
|
||||
#include <machine/asmacros.h>
|
||||
#include <machine/psl.h>
|
||||
#include <machine/trap.h>
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include "assym.s"
|
||||
|
||||
#ifdef KDTRACE_HOOKS
|
||||
.bss
|
||||
.globl dtrace_invop_jump_addr
|
||||
@ -100,68 +104,62 @@ dtrace_invop_calltrap_addr:
|
||||
MCOUNT_LABEL(user)
|
||||
MCOUNT_LABEL(btrap)
|
||||
|
||||
/* Traps that we leave interrupts disabled for.. */
|
||||
#define TRAP_NOEN(a) \
|
||||
subq $TF_RIP,%rsp; \
|
||||
movl $(a),TF_TRAPNO(%rsp) ; \
|
||||
movq $0,TF_ADDR(%rsp) ; \
|
||||
movq $0,TF_ERR(%rsp) ; \
|
||||
/* Traps that we leave interrupts disabled for. */
|
||||
.macro TRAP_NOEN l, trapno
|
||||
PTI_ENTRY \l,X\l
|
||||
.globl X\l
|
||||
.type X\l,@function
|
||||
X\l: subq $TF_RIP,%rsp
|
||||
movl $\trapno,TF_TRAPNO(%rsp)
|
||||
movq $0,TF_ADDR(%rsp)
|
||||
movq $0,TF_ERR(%rsp)
|
||||
jmp alltraps_noen
|
||||
IDTVEC(dbg)
|
||||
TRAP_NOEN(T_TRCTRAP)
|
||||
IDTVEC(bpt)
|
||||
TRAP_NOEN(T_BPTFLT)
|
||||
.endm
|
||||
|
||||
TRAP_NOEN dbg, T_TRCTRAP
|
||||
TRAP_NOEN bpt, T_BPTFLT
|
||||
#ifdef KDTRACE_HOOKS
|
||||
IDTVEC(dtrace_ret)
|
||||
TRAP_NOEN(T_DTRACE_RET)
|
||||
TRAP_NOEN dtrace_ret, T_DTRACE_RET
|
||||
#endif
|
||||
|
||||
/* Regular traps; The cpu does not supply tf_err for these. */
|
||||
#define TRAP(a) \
|
||||
subq $TF_RIP,%rsp; \
|
||||
movl $(a),TF_TRAPNO(%rsp) ; \
|
||||
movq $0,TF_ADDR(%rsp) ; \
|
||||
movq $0,TF_ERR(%rsp) ; \
|
||||
.macro TRAP l, trapno
|
||||
PTI_ENTRY \l,X\l
|
||||
.globl X\l
|
||||
.type X\l,@function
|
||||
X\l:
|
||||
subq $TF_RIP,%rsp
|
||||
movl $\trapno,TF_TRAPNO(%rsp)
|
||||
movq $0,TF_ADDR(%rsp)
|
||||
movq $0,TF_ERR(%rsp)
|
||||
jmp alltraps
|
||||
IDTVEC(div)
|
||||
TRAP(T_DIVIDE)
|
||||
IDTVEC(ofl)
|
||||
TRAP(T_OFLOW)
|
||||
IDTVEC(bnd)
|
||||
TRAP(T_BOUND)
|
||||
IDTVEC(ill)
|
||||
TRAP(T_PRIVINFLT)
|
||||
IDTVEC(dna)
|
||||
TRAP(T_DNA)
|
||||
IDTVEC(fpusegm)
|
||||
TRAP(T_FPOPFLT)
|
||||
IDTVEC(mchk)
|
||||
TRAP(T_MCHK)
|
||||
IDTVEC(rsvd)
|
||||
TRAP(T_RESERVED)
|
||||
IDTVEC(fpu)
|
||||
TRAP(T_ARITHTRAP)
|
||||
IDTVEC(xmm)
|
||||
TRAP(T_XMMFLT)
|
||||
.endm
|
||||
|
||||
/* This group of traps have tf_err already pushed by the cpu */
|
||||
#define TRAP_ERR(a) \
|
||||
subq $TF_ERR,%rsp; \
|
||||
movl $(a),TF_TRAPNO(%rsp) ; \
|
||||
movq $0,TF_ADDR(%rsp) ; \
|
||||
TRAP div, T_DIVIDE
|
||||
TRAP ofl, T_OFLOW
|
||||
TRAP bnd, T_BOUND
|
||||
TRAP ill, T_PRIVINFLT
|
||||
TRAP dna, T_DNA
|
||||
TRAP fpusegm, T_FPOPFLT
|
||||
TRAP mchk, T_MCHK
|
||||
TRAP rsvd, T_RESERVED
|
||||
TRAP fpu, T_ARITHTRAP
|
||||
TRAP xmm, T_XMMFLT
|
||||
|
||||
/* This group of traps have tf_err already pushed by the cpu. */
|
||||
.macro TRAP_ERR l, trapno
|
||||
PTI_ENTRY \l,X\l,has_err=1
|
||||
.globl X\l
|
||||
.type X\l,@function
|
||||
X\l:
|
||||
subq $TF_ERR,%rsp
|
||||
movl $\trapno,TF_TRAPNO(%rsp)
|
||||
movq $0,TF_ADDR(%rsp)
|
||||
jmp alltraps
|
||||
IDTVEC(tss)
|
||||
TRAP_ERR(T_TSSFLT)
|
||||
IDTVEC(missing)
|
||||
subq $TF_ERR,%rsp
|
||||
movl $T_SEGNPFLT,TF_TRAPNO(%rsp)
|
||||
jmp prot_addrf
|
||||
IDTVEC(stk)
|
||||
subq $TF_ERR,%rsp
|
||||
movl $T_STKFLT,TF_TRAPNO(%rsp)
|
||||
jmp prot_addrf
|
||||
IDTVEC(align)
|
||||
TRAP_ERR(T_ALIGNFLT)
|
||||
.endm
|
||||
|
||||
TRAP_ERR tss, T_TSSFLT
|
||||
TRAP_ERR align, T_ALIGNFLT
|
||||
|
||||
/*
|
||||
* alltraps entry point. Use swapgs if this is the first time in the
|
||||
@ -174,15 +172,12 @@ IDTVEC(align)
|
||||
alltraps:
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz alltraps_testi /* already running with kernel GS.base */
|
||||
jz alltraps_segs /* already running with kernel GS.base */
|
||||
swapgs
|
||||
movq PCPU(CURPCB),%rdi
|
||||
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
alltraps_testi:
|
||||
alltraps_segs:
|
||||
SAVE_SEGS
|
||||
testl $PSL_I,TF_RFLAGS(%rsp)
|
||||
jz alltraps_pushregs_no_rdi
|
||||
sti
|
||||
@ -249,14 +244,12 @@ calltrap:
|
||||
alltraps_noen:
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz 1f /* already running with kernel GS.base */
|
||||
jz alltraps_noen_segs /* already running with kernel GS.base */
|
||||
swapgs
|
||||
movq PCPU(CURPCB),%rdi
|
||||
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
|
||||
1: movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
alltraps_noen_segs:
|
||||
SAVE_SEGS
|
||||
jmp alltraps_pushregs_no_rdi
|
||||
|
||||
IDTVEC(dblfault)
|
||||
@ -279,37 +272,36 @@ IDTVEC(dblfault)
|
||||
movq %r13,TF_R13(%rsp)
|
||||
movq %r14,TF_R14(%rsp)
|
||||
movq %r15,TF_R15(%rsp)
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
SAVE_SEGS
|
||||
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
||||
cld
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz 1f /* already running with kernel GS.base */
|
||||
swapgs
|
||||
1:
|
||||
movq %rsp,%rdi
|
||||
movq PCPU(KCR3),%rax
|
||||
cmpq $~0,%rax
|
||||
je 2f
|
||||
movq %rax,%cr3
|
||||
2: movq %rsp,%rdi
|
||||
call dblfault_handler
|
||||
2:
|
||||
hlt
|
||||
jmp 2b
|
||||
3: hlt
|
||||
jmp 3b
|
||||
|
||||
PTI_ENTRY page, Xpage, has_err=1
|
||||
IDTVEC(page)
|
||||
subq $TF_ERR,%rsp
|
||||
movl $T_PAGEFLT,TF_TRAPNO(%rsp)
|
||||
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz 1f /* already running with kernel GS.base */
|
||||
jz page_cr2 /* already running with kernel GS.base */
|
||||
swapgs
|
||||
movq PCPU(CURPCB),%rdi
|
||||
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
|
||||
1: movq %cr2,%rdi /* preserve %cr2 before .. */
|
||||
page_cr2:
|
||||
movq %cr2,%rdi /* preserve %cr2 before .. */
|
||||
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
SAVE_SEGS
|
||||
movl $T_PAGEFLT,TF_TRAPNO(%rsp)
|
||||
testl $PSL_I,TF_RFLAGS(%rsp)
|
||||
jz alltraps_pushregs_no_rdi
|
||||
sti
|
||||
@ -320,10 +312,43 @@ IDTVEC(page)
|
||||
* the iretq stage, we'll reenter with the wrong gs state. We'll have
|
||||
* to do a special the swapgs in this case even coming from the kernel.
|
||||
* XXX linux has a trap handler for their equivalent of load_gs().
|
||||
*
|
||||
* On the stack, we have the hardware interrupt frame to return
|
||||
* to usermode (faulted) and another frame with error code, for
|
||||
* fault. For PTI, copy both frames to the main thread stack.
|
||||
*/
|
||||
IDTVEC(prot)
|
||||
.macro PROTF_ENTRY name,trapno
|
||||
\name\()_pti_doreti:
|
||||
pushq %rax
|
||||
pushq %rdx
|
||||
swapgs
|
||||
movq PCPU(KCR3),%rax
|
||||
movq %rax,%cr3
|
||||
movq PCPU(RSP0),%rax
|
||||
subq $2*PTI_SIZE-3*8,%rax
|
||||
MOVE_STACKS (PTI_SIZE / 4 - 3)
|
||||
movq %rax,%rsp
|
||||
popq %rdx
|
||||
popq %rax
|
||||
swapgs
|
||||
jmp X\name
|
||||
IDTVEC(\name\()_pti)
|
||||
cmpq $doreti_iret,PTI_RIP-2*8(%rsp)
|
||||
je \name\()_pti_doreti
|
||||
testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */
|
||||
jz X\name
|
||||
PTI_UENTRY has_err=1
|
||||
swapgs
|
||||
IDTVEC(\name)
|
||||
subq $TF_ERR,%rsp
|
||||
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
||||
movl $\trapno,TF_TRAPNO(%rsp)
|
||||
jmp prot_addrf
|
||||
.endm
|
||||
|
||||
PROTF_ENTRY missing, T_SEGNPFLT
|
||||
PROTF_ENTRY stk, T_STKFLT
|
||||
PROTF_ENTRY prot, T_PROTFLT
|
||||
|
||||
prot_addrf:
|
||||
movq $0,TF_ADDR(%rsp)
|
||||
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
|
||||
@ -375,8 +400,18 @@ prot_addrf:
|
||||
* We do not support invoking this from a custom segment registers,
|
||||
* esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT.
|
||||
*/
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(fast_syscall_pti)
|
||||
swapgs
|
||||
movq %rax,PCPU(SCRATCH_RAX)
|
||||
movq PCPU(KCR3),%rax
|
||||
movq %rax,%cr3
|
||||
jmp fast_syscall_common
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(fast_syscall)
|
||||
swapgs
|
||||
movq %rax,PCPU(SCRATCH_RAX)
|
||||
fast_syscall_common:
|
||||
movq %rsp,PCPU(SCRATCH_RSP)
|
||||
movq PCPU(RSP0),%rsp
|
||||
/* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
|
||||
@ -386,10 +421,9 @@ IDTVEC(fast_syscall)
|
||||
movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */
|
||||
movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */
|
||||
movq %r11,TF_RSP(%rsp) /* user stack pointer */
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
movq PCPU(SCRATCH_RAX),%rax
|
||||
movq %rax,TF_RAX(%rsp) /* syscall number */
|
||||
SAVE_SEGS
|
||||
movq PCPU(CURPCB),%r11
|
||||
andl $~PCB_FULL_IRET,PCB_FLAGS(%r11)
|
||||
sti
|
||||
@ -402,7 +436,6 @@ IDTVEC(fast_syscall)
|
||||
movq %r10,TF_RCX(%rsp) /* arg 4 */
|
||||
movq %r8,TF_R8(%rsp) /* arg 5 */
|
||||
movq %r9,TF_R9(%rsp) /* arg 6 */
|
||||
movq %rax,TF_RAX(%rsp) /* syscall number */
|
||||
movq %rbx,TF_RBX(%rsp) /* C preserved */
|
||||
movq %rbp,TF_RBP(%rsp) /* C preserved */
|
||||
movq %r12,TF_R12(%rsp) /* C preserved */
|
||||
@ -420,11 +453,11 @@ IDTVEC(fast_syscall)
|
||||
/* Disable interrupts before testing PCB_FULL_IRET. */
|
||||
cli
|
||||
testl $PCB_FULL_IRET,PCB_FLAGS(%rax)
|
||||
jnz 3f
|
||||
jnz 4f
|
||||
/* Check for and handle AST's on return to userland. */
|
||||
movq PCPU(CURTHREAD),%rax
|
||||
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
|
||||
jne 2f
|
||||
jne 3f
|
||||
/* Restore preserved registers. */
|
||||
MEXITCOUNT
|
||||
movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */
|
||||
@ -434,16 +467,21 @@ IDTVEC(fast_syscall)
|
||||
movq TF_RFLAGS(%rsp),%r11 /* original %rflags */
|
||||
movq TF_RIP(%rsp),%rcx /* original %rip */
|
||||
movq TF_RSP(%rsp),%rsp /* user stack pointer */
|
||||
swapgs
|
||||
cmpb $0,pti
|
||||
je 2f
|
||||
movq PCPU(UCR3),%r9
|
||||
movq %r9,%cr3
|
||||
xorl %r9d,%r9d
|
||||
2: swapgs
|
||||
sysretq
|
||||
|
||||
2: /* AST scheduled. */
|
||||
3: /* AST scheduled. */
|
||||
sti
|
||||
movq %rsp,%rdi
|
||||
call ast
|
||||
jmp 1b
|
||||
|
||||
3: /* Requested full context restore, use doreti for that. */
|
||||
4: /* Requested full context restore, use doreti for that. */
|
||||
MEXITCOUNT
|
||||
jmp doreti
|
||||
|
||||
@ -499,17 +537,15 @@ IDTVEC(nmi)
|
||||
movq %r13,TF_R13(%rsp)
|
||||
movq %r14,TF_R14(%rsp)
|
||||
movq %r15,TF_R15(%rsp)
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
SAVE_SEGS
|
||||
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
||||
cld
|
||||
xorl %ebx,%ebx
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp)
|
||||
jnz nmi_fromuserspace
|
||||
/*
|
||||
* We've interrupted the kernel. Preserve GS.base in %r12.
|
||||
* We've interrupted the kernel. Preserve GS.base in %r12
|
||||
* and %cr3 in %r13.
|
||||
*/
|
||||
movl $MSR_GSBASE,%ecx
|
||||
rdmsr
|
||||
@ -521,27 +557,38 @@ IDTVEC(nmi)
|
||||
movl %edx,%eax
|
||||
shrq $32,%rdx
|
||||
wrmsr
|
||||
movq %cr3,%r13
|
||||
movq PCPU(KCR3),%rax
|
||||
cmpq $~0,%rax
|
||||
je nmi_calltrap
|
||||
movq %rax,%cr3
|
||||
jmp nmi_calltrap
|
||||
nmi_fromuserspace:
|
||||
incl %ebx
|
||||
swapgs
|
||||
testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
|
||||
jz 2f
|
||||
movq %cr3,%r13
|
||||
movq PCPU(KCR3),%rax
|
||||
cmpq $~0,%rax
|
||||
je 1f
|
||||
movq %rax,%cr3
|
||||
movq PCPU(CURPCB),%rdi
|
||||
testq %rdi,%rdi
|
||||
jz 2f
|
||||
jz 3f
|
||||
orl $PCB_FULL_IRET,PCB_FLAGS(%rdi)
|
||||
1: testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
|
||||
jz 3f
|
||||
cmpw $KUF32SEL,TF_FS(%rsp)
|
||||
jne 1f
|
||||
jne 2f
|
||||
rdfsbase %rax
|
||||
movq %rax,PCB_FSBASE(%rdi)
|
||||
1: cmpw $KUG32SEL,TF_GS(%rsp)
|
||||
jne 2f
|
||||
2: cmpw $KUG32SEL,TF_GS(%rsp)
|
||||
jne 3f
|
||||
movl $MSR_KGSBASE,%ecx
|
||||
rdmsr
|
||||
shlq $32,%rdx
|
||||
orq %rdx,%rax
|
||||
movq %rax,PCB_GSBASE(%rdi)
|
||||
2:
|
||||
3:
|
||||
/* Note: this label is also used by ddb and gdb: */
|
||||
nmi_calltrap:
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
@ -564,26 +611,29 @@ nmi_calltrap:
|
||||
movq PCPU(CURTHREAD),%rax
|
||||
orq %rax,%rax /* curthread present? */
|
||||
jz nocallchain
|
||||
testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
|
||||
jz nocallchain
|
||||
/*
|
||||
* A user callchain is to be captured, so:
|
||||
* - Move execution to the regular kernel stack, to allow for
|
||||
* nested NMI interrupts.
|
||||
* - Take the processor out of "NMI" mode by faking an "iret".
|
||||
* - Enable interrupts, so that copyin() can work.
|
||||
* Move execution to the regular kernel stack, because we
|
||||
* committed to return through doreti.
|
||||
*/
|
||||
movq %rsp,%rsi /* source stack pointer */
|
||||
movq $TF_SIZE,%rcx
|
||||
movq PCPU(RSP0),%rdx
|
||||
subq %rcx,%rdx
|
||||
movq %rdx,%rdi /* destination stack pointer */
|
||||
|
||||
shrq $3,%rcx /* trap frame size in long words */
|
||||
cld
|
||||
rep
|
||||
movsq /* copy trapframe */
|
||||
movq %rdx,%rsp /* we are on the regular kstack */
|
||||
|
||||
testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
|
||||
jz nocallchain
|
||||
/*
|
||||
* A user callchain is to be captured, so:
|
||||
* - Take the processor out of "NMI" mode by faking an "iret",
|
||||
* to allow for nested NMI interrupts.
|
||||
* - Enable interrupts, so that copyin() can work.
|
||||
*/
|
||||
movl %ss,%eax
|
||||
pushq %rax /* tf_ss */
|
||||
pushq %rdx /* tf_rsp (on kernel stack) */
|
||||
@ -624,22 +674,9 @@ nmi_kernelexit:
|
||||
movl %edx,%eax
|
||||
shrq $32,%rdx
|
||||
wrmsr
|
||||
movq %r13,%cr3
|
||||
nmi_restoreregs:
|
||||
movq TF_RDI(%rsp),%rdi
|
||||
movq TF_RSI(%rsp),%rsi
|
||||
movq TF_RDX(%rsp),%rdx
|
||||
movq TF_RCX(%rsp),%rcx
|
||||
movq TF_R8(%rsp),%r8
|
||||
movq TF_R9(%rsp),%r9
|
||||
movq TF_RAX(%rsp),%rax
|
||||
movq TF_RBX(%rsp),%rbx
|
||||
movq TF_RBP(%rsp),%rbp
|
||||
movq TF_R10(%rsp),%r10
|
||||
movq TF_R11(%rsp),%r11
|
||||
movq TF_R12(%rsp),%r12
|
||||
movq TF_R13(%rsp),%r13
|
||||
movq TF_R14(%rsp),%r14
|
||||
movq TF_R15(%rsp),%r15
|
||||
RESTORE_REGS
|
||||
addq $TF_RIP,%rsp
|
||||
jmp doreti_iret
|
||||
|
||||
@ -807,27 +844,38 @@ ld_es:
|
||||
ld_ds:
|
||||
movw TF_DS(%rsp),%ds
|
||||
ld_regs:
|
||||
movq TF_RDI(%rsp),%rdi
|
||||
movq TF_RSI(%rsp),%rsi
|
||||
movq TF_RDX(%rsp),%rdx
|
||||
movq TF_RCX(%rsp),%rcx
|
||||
movq TF_R8(%rsp),%r8
|
||||
movq TF_R9(%rsp),%r9
|
||||
movq TF_RAX(%rsp),%rax
|
||||
movq TF_RBX(%rsp),%rbx
|
||||
movq TF_RBP(%rsp),%rbp
|
||||
movq TF_R10(%rsp),%r10
|
||||
movq TF_R11(%rsp),%r11
|
||||
movq TF_R12(%rsp),%r12
|
||||
movq TF_R13(%rsp),%r13
|
||||
movq TF_R14(%rsp),%r14
|
||||
movq TF_R15(%rsp),%r15
|
||||
RESTORE_REGS
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz 1f /* keep running with kernel GS.base */
|
||||
jz 2f /* keep running with kernel GS.base */
|
||||
cli
|
||||
cmpb $0,pti
|
||||
je 1f
|
||||
pushq %rdx
|
||||
movq PCPU(PRVSPACE),%rdx
|
||||
addq $PC_PTI_STACK+PC_PTI_STACK_SZ*8-PTI_SIZE,%rdx
|
||||
movq %rax,PTI_RAX(%rdx)
|
||||
popq %rax
|
||||
movq %rax,PTI_RDX(%rdx)
|
||||
movq TF_RIP(%rsp),%rax
|
||||
movq %rax,PTI_RIP(%rdx)
|
||||
movq TF_CS(%rsp),%rax
|
||||
movq %rax,PTI_CS(%rdx)
|
||||
movq TF_RFLAGS(%rsp),%rax
|
||||
movq %rax,PTI_RFLAGS(%rdx)
|
||||
movq TF_RSP(%rsp),%rax
|
||||
movq %rax,PTI_RSP(%rdx)
|
||||
movq TF_SS(%rsp),%rax
|
||||
movq %rax,PTI_SS(%rdx)
|
||||
movq PCPU(UCR3),%rax
|
||||
swapgs
|
||||
1:
|
||||
addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */
|
||||
movq %rdx,%rsp
|
||||
movq %rax,%cr3
|
||||
popq %rdx
|
||||
popq %rax
|
||||
addq $8,%rsp
|
||||
jmp doreti_iret
|
||||
1: swapgs
|
||||
2: addq $TF_RIP,%rsp
|
||||
.globl doreti_iret
|
||||
doreti_iret:
|
||||
iretq
|
||||
@ -851,14 +899,11 @@ set_segs:
|
||||
.globl doreti_iret_fault
|
||||
doreti_iret_fault:
|
||||
subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */
|
||||
testl $PSL_I,TF_RFLAGS(%rsp)
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp)
|
||||
jz 1f
|
||||
sti
|
||||
1:
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
SAVE_SEGS
|
||||
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
movq %rsi,TF_RSI(%rsp)
|
||||
@ -885,7 +930,7 @@ doreti_iret_fault:
|
||||
.globl ds_load_fault
|
||||
ds_load_fault:
|
||||
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
||||
testl $PSL_I,TF_RFLAGS(%rsp)
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp)
|
||||
jz 1f
|
||||
sti
|
||||
1:
|
||||
|
@ -186,6 +186,16 @@ ASSYM(TF_FLAGS, offsetof(struct trapframe, tf_flags));
|
||||
ASSYM(TF_SIZE, sizeof(struct trapframe));
|
||||
ASSYM(TF_HASSEGS, TF_HASSEGS);
|
||||
|
||||
ASSYM(PTI_RDX, offsetof(struct pti_frame, pti_rdx));
|
||||
ASSYM(PTI_RAX, offsetof(struct pti_frame, pti_rax));
|
||||
ASSYM(PTI_ERR, offsetof(struct pti_frame, pti_err));
|
||||
ASSYM(PTI_RIP, offsetof(struct pti_frame, pti_rip));
|
||||
ASSYM(PTI_CS, offsetof(struct pti_frame, pti_cs));
|
||||
ASSYM(PTI_RFLAGS, offsetof(struct pti_frame, pti_rflags));
|
||||
ASSYM(PTI_RSP, offsetof(struct pti_frame, pti_rsp));
|
||||
ASSYM(PTI_SS, offsetof(struct pti_frame, pti_ss));
|
||||
ASSYM(PTI_SIZE, sizeof(struct pti_frame));
|
||||
|
||||
ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
|
||||
ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
|
||||
ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_rflags));
|
||||
@ -202,6 +212,7 @@ ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
|
||||
ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
|
||||
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
|
||||
ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp));
|
||||
ASSYM(PC_SCRATCH_RAX, offsetof(struct pcpu, pc_scratch_rax));
|
||||
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
|
||||
ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp));
|
||||
ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0));
|
||||
@ -211,6 +222,10 @@ ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt));
|
||||
ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp));
|
||||
ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
|
||||
ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
|
||||
ASSYM(PC_KCR3, offsetof(struct pcpu, pc_kcr3));
|
||||
ASSYM(PC_UCR3, offsetof(struct pcpu, pc_ucr3));
|
||||
ASSYM(PC_PTI_STACK, offsetof(struct pcpu, pc_pti_stack));
|
||||
ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ);
|
||||
|
||||
ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
|
||||
ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL);
|
||||
|
@ -813,13 +813,20 @@ extern inthand_t
|
||||
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
|
||||
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
|
||||
IDTVEC(xmm), IDTVEC(dblfault),
|
||||
IDTVEC(div_pti), IDTVEC(dbg_pti), IDTVEC(bpt_pti),
|
||||
IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti),
|
||||
IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti),
|
||||
IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), IDTVEC(mchk_pti),
|
||||
IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti),
|
||||
IDTVEC(xmm_pti),
|
||||
#ifdef KDTRACE_HOOKS
|
||||
IDTVEC(dtrace_ret),
|
||||
IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti),
|
||||
#endif
|
||||
#ifdef XENHVM
|
||||
IDTVEC(xen_intr_upcall),
|
||||
IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti),
|
||||
#endif
|
||||
IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
|
||||
IDTVEC(fast_syscall), IDTVEC(fast_syscall32),
|
||||
IDTVEC(fast_syscall_pti);
|
||||
|
||||
#ifdef DDB
|
||||
/*
|
||||
@ -1520,7 +1527,8 @@ amd64_conf_fast_syscall(void)
|
||||
|
||||
msr = rdmsr(MSR_EFER) | EFER_SCE;
|
||||
wrmsr(MSR_EFER, msr);
|
||||
wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
|
||||
wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) :
|
||||
(u_int64_t)IDTVEC(fast_syscall));
|
||||
wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
|
||||
msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
|
||||
((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
|
||||
@ -1536,6 +1544,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
|
||||
struct pcpu *pc;
|
||||
struct nmi_pcpu *np;
|
||||
struct xstate_hdr *xhdr;
|
||||
u_int64_t rsp0;
|
||||
char *env;
|
||||
size_t kstack0_sz;
|
||||
int late_console;
|
||||
@ -1609,34 +1618,54 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
|
||||
mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
|
||||
|
||||
/* exceptions */
|
||||
TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
|
||||
|
||||
for (x = 0; x < NIDT; x++)
|
||||
setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_DB, pti ? &IDTVEC(dbg_pti) : &IDTVEC(dbg), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2);
|
||||
setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0);
|
||||
setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT,
|
||||
SEL_UPL, 0);
|
||||
setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
|
||||
setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_MC, pti ? &IDTVEC(mchk_pti) : &IDTVEC(mchk), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
#ifdef KDTRACE_HOOKS
|
||||
setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0);
|
||||
setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) :
|
||||
&IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0);
|
||||
#endif
|
||||
#ifdef XENHVM
|
||||
setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_UPL, 0);
|
||||
#endif
|
||||
|
||||
r_idt.rd_limit = sizeof(idt0) - 1;
|
||||
r_idt.rd_base = (long) idt;
|
||||
lidt(&r_idt);
|
||||
@ -1750,10 +1779,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
|
||||
xhdr->xstate_bv = xsave_mask;
|
||||
}
|
||||
/* make an initial tss so cpu can get interrupt stack on syscall! */
|
||||
common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb;
|
||||
rsp0 = (vm_offset_t)thread0.td_pcb;
|
||||
/* Ensure the stack is aligned to 16 bytes */
|
||||
common_tss[0].tss_rsp0 &= ~0xFul;
|
||||
PCPU_SET(rsp0, common_tss[0].tss_rsp0);
|
||||
rsp0 &= ~0xFul;
|
||||
common_tss[0].tss_rsp0 = pti ? ((vm_offset_t)PCPU_PTR(pti_stack) +
|
||||
PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful : rsp0;
|
||||
PCPU_SET(rsp0, rsp0);
|
||||
PCPU_SET(curpcb, thread0.td_pcb);
|
||||
|
||||
/* transfer to user mode */
|
||||
|
@ -132,33 +132,40 @@ cpu_mp_start(void)
|
||||
/* Install an inter-CPU IPI for TLB invalidation */
|
||||
if (pmap_pcid_enabled) {
|
||||
if (invpcid_works) {
|
||||
setidt(IPI_INVLTLB, IDTVEC(invltlb_invpcid),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_invpcid_pti) :
|
||||
IDTVEC(invltlb_invpcid), SDT_SYSIGT, SEL_KPL, 0);
|
||||
} else {
|
||||
setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) :
|
||||
IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0);
|
||||
}
|
||||
} else {
|
||||
setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
}
|
||||
setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
|
||||
/* Install an inter-CPU IPI for cache invalidation. */
|
||||
setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
|
||||
/* Install an inter-CPU IPI for all-CPU rendezvous */
|
||||
setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) :
|
||||
IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
|
||||
|
||||
/* Install generic inter-CPU IPI handler */
|
||||
setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) :
|
||||
IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0);
|
||||
|
||||
/* Install an inter-CPU IPI for CPU stop/restart */
|
||||
setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
|
||||
/* Install an inter-CPU IPI for CPU suspend/resume */
|
||||
setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
|
||||
/* Set boot_cpu_id if needed. */
|
||||
if (boot_cpu_id == -1) {
|
||||
@ -197,7 +204,6 @@ init_secondary(void)
|
||||
|
||||
/* Init tss */
|
||||
common_tss[cpu] = common_tss[0];
|
||||
common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */
|
||||
common_tss[cpu].tss_iobase = sizeof(struct amd64tss) +
|
||||
IOPERM_BITMAP_SIZE;
|
||||
common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
|
||||
@ -240,6 +246,8 @@ init_secondary(void)
|
||||
pc->pc_curpmap = kernel_pmap;
|
||||
pc->pc_pcid_gen = 1;
|
||||
pc->pc_pcid_next = PMAP_PCID_KERN + 1;
|
||||
common_tss[cpu].tss_rsp0 = pti ? ((vm_offset_t)&pc->pc_pti_stack +
|
||||
PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful : 0;
|
||||
|
||||
/* Save the per-cpu pointer for use by the NMI handler. */
|
||||
np->np_pcpu = (register_t) pc;
|
||||
|
@ -11,11 +11,17 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2018 The FreeBSD Foundation
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* the Systems Programming Group of the University of Utah Computer
|
||||
* Science Department and William Jolitz of UUNET Technologies Inc.
|
||||
*
|
||||
* Portions of this software were developed by
|
||||
* Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
|
||||
* the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
@ -149,6 +155,7 @@ __FBSDID("$FreeBSD$");
|
||||
#ifdef SMP
|
||||
#include <machine/smp.h>
|
||||
#endif
|
||||
#include <machine/tss.h>
|
||||
|
||||
static __inline boolean_t
|
||||
pmap_type_guest(pmap_t pmap)
|
||||
@ -210,6 +217,8 @@ pmap_rw_bit(pmap_t pmap)
|
||||
return (mask);
|
||||
}
|
||||
|
||||
static pt_entry_t pg_g;
|
||||
|
||||
static __inline pt_entry_t
|
||||
pmap_global_bit(pmap_t pmap)
|
||||
{
|
||||
@ -217,7 +226,7 @@ pmap_global_bit(pmap_t pmap)
|
||||
|
||||
switch (pmap->pm_type) {
|
||||
case PT_X86:
|
||||
mask = X86_PG_G;
|
||||
mask = pg_g;
|
||||
break;
|
||||
case PT_RVI:
|
||||
case PT_EPT:
|
||||
@ -405,6 +414,15 @@ int invpcid_works = 0;
|
||||
SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0,
|
||||
"Is the invpcid instruction available ?");
|
||||
|
||||
int pti = 0;
|
||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
|
||||
&pti, 0,
|
||||
"Page Table Isolation enabled");
|
||||
static vm_object_t pti_obj;
|
||||
static pml4_entry_t *pti_pml4;
|
||||
static vm_pindex_t pti_pg_idx;
|
||||
static bool pti_finalized;
|
||||
|
||||
static int
|
||||
pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
@ -639,6 +657,11 @@ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
|
||||
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
|
||||
vm_prot_t prot);
|
||||
static void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask);
|
||||
static void pmap_pti_add_kva_locked(vm_offset_t sva, vm_offset_t eva,
|
||||
bool exec);
|
||||
static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va);
|
||||
static pd_entry_t *pmap_pti_pde(vm_offset_t va);
|
||||
static void pmap_pti_wire_pte(void *pte);
|
||||
static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
|
||||
struct spglist *free, struct rwlock **lockp);
|
||||
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
|
||||
@ -921,7 +944,7 @@ create_pagetables(vm_paddr_t *firstaddr)
|
||||
/* XXX not fully used, underneath 2M pages */
|
||||
pt_p = (pt_entry_t *)KPTphys;
|
||||
for (i = 0; ptoa(i) < *firstaddr; i++)
|
||||
pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | X86_PG_G;
|
||||
pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | pg_g;
|
||||
|
||||
/* Now map the page tables at their location within PTmap */
|
||||
pd_p = (pd_entry_t *)KPDphys;
|
||||
@ -932,7 +955,7 @@ create_pagetables(vm_paddr_t *firstaddr)
|
||||
/* This replaces some of the KPTphys entries above */
|
||||
for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
|
||||
pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS |
|
||||
X86_PG_G;
|
||||
pg_g;
|
||||
|
||||
/* And connect up the PD to the PDP (leaving room for L4 pages) */
|
||||
pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE));
|
||||
@ -952,14 +975,14 @@ create_pagetables(vm_paddr_t *firstaddr)
|
||||
for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) {
|
||||
pd_p[j] = (vm_paddr_t)i << PDRSHIFT;
|
||||
/* Preset PG_M and PG_A because demotion expects it. */
|
||||
pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
|
||||
pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | pg_g |
|
||||
X86_PG_M | X86_PG_A | pg_nx;
|
||||
}
|
||||
pdp_p = (pdp_entry_t *)DMPDPphys;
|
||||
for (i = 0; i < ndm1g; i++) {
|
||||
pdp_p[i] = (vm_paddr_t)i << PDPSHIFT;
|
||||
/* Preset PG_M and PG_A because demotion expects it. */
|
||||
pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
|
||||
pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | pg_g |
|
||||
X86_PG_M | X86_PG_A | pg_nx;
|
||||
}
|
||||
for (j = 0; i < ndmpdp; i++, j++) {
|
||||
@ -1002,6 +1025,9 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
|
||||
pt_entry_t *pte;
|
||||
int i;
|
||||
|
||||
if (!pti)
|
||||
pg_g = X86_PG_G;
|
||||
|
||||
/*
|
||||
* Create an initial set of page tables to run the kernel in.
|
||||
*/
|
||||
@ -1071,6 +1097,8 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
|
||||
pmap_init_pat();
|
||||
|
||||
/* Initialize TLB Context Id. */
|
||||
if (pti)
|
||||
pmap_pcid_enabled = 0;
|
||||
TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
|
||||
if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
|
||||
/* Check for INVPCID support */
|
||||
@ -2114,7 +2142,7 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa)
|
||||
pt_entry_t *pte;
|
||||
|
||||
pte = vtopte(va);
|
||||
pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G);
|
||||
pte_store(pte, pa | X86_PG_RW | X86_PG_V | pg_g);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
@ -2125,7 +2153,7 @@ pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
|
||||
|
||||
pte = vtopte(va);
|
||||
cache_bits = pmap_cache_bits(kernel_pmap, mode, 0);
|
||||
pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G | cache_bits);
|
||||
pte_store(pte, pa | X86_PG_RW | X86_PG_V | pg_g | cache_bits);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2185,7 +2213,7 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
|
||||
pa = VM_PAGE_TO_PHYS(m) | cache_bits;
|
||||
if ((*pte & (PG_FRAME | X86_PG_PTE_CACHE)) != pa) {
|
||||
oldpte |= *pte;
|
||||
pte_store(pte, pa | X86_PG_G | X86_PG_RW | X86_PG_V);
|
||||
pte_store(pte, pa | pg_g | X86_PG_RW | X86_PG_V);
|
||||
}
|
||||
pte++;
|
||||
}
|
||||
@ -2306,6 +2334,10 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
|
||||
pml4_entry_t *pml4;
|
||||
pml4 = pmap_pml4e(pmap, va);
|
||||
*pml4 = 0;
|
||||
if (pmap->pm_pml4u != NULL && va <= VM_MAXUSER_ADDRESS) {
|
||||
pml4 = &pmap->pm_pml4u[pmap_pml4e_index(va)];
|
||||
*pml4 = 0;
|
||||
}
|
||||
} else if (m->pindex >= NUPDE) {
|
||||
/* PD page */
|
||||
pdp_entry_t *pdp;
|
||||
@ -2364,7 +2396,9 @@ pmap_pinit0(pmap_t pmap)
|
||||
|
||||
PMAP_LOCK_INIT(pmap);
|
||||
pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
|
||||
pmap->pm_pml4u = NULL;
|
||||
pmap->pm_cr3 = KPML4phys;
|
||||
pmap->pm_ucr3 = ~0UL;
|
||||
pmap->pm_root.rt_root = 0;
|
||||
CPU_ZERO(&pmap->pm_active);
|
||||
TAILQ_INIT(&pmap->pm_pvchunk);
|
||||
@ -2373,6 +2407,8 @@ pmap_pinit0(pmap_t pmap)
|
||||
CPU_FOREACH(i) {
|
||||
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
|
||||
pmap->pm_pcids[i].pm_gen = 0;
|
||||
if (!pti)
|
||||
__pcpu[i].pc_kcr3 = ~0ul;
|
||||
}
|
||||
PCPU_SET(curpmap, kernel_pmap);
|
||||
pmap_activate(curthread);
|
||||
@ -2402,6 +2438,17 @@ pmap_pinit_pml4(vm_page_t pml4pg)
|
||||
X86_PG_A | X86_PG_M;
|
||||
}
|
||||
|
||||
static void
|
||||
pmap_pinit_pml4_pti(vm_page_t pml4pg)
|
||||
{
|
||||
pml4_entry_t *pm_pml4;
|
||||
int i;
|
||||
|
||||
pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg));
|
||||
for (i = 0; i < NPML4EPG; i++)
|
||||
pm_pml4[i] = pti_pml4[i];
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize a preallocated and zeroed pmap structure,
|
||||
* such as one in a vmspace structure.
|
||||
@ -2409,7 +2456,7 @@ pmap_pinit_pml4(vm_page_t pml4pg)
|
||||
int
|
||||
pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
|
||||
{
|
||||
vm_page_t pml4pg;
|
||||
vm_page_t pml4pg, pml4pgu;
|
||||
vm_paddr_t pml4phys;
|
||||
int i;
|
||||
|
||||
@ -2425,8 +2472,10 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
|
||||
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
|
||||
pmap->pm_pcids[i].pm_gen = 0;
|
||||
}
|
||||
pmap->pm_cr3 = ~0; /* initialize to an invalid value */
|
||||
pmap->pm_cr3 = ~0l; /* initialize to an invalid value */
|
||||
pmap->pm_pml4u = NULL;
|
||||
|
||||
pmap->pm_type = pm_type;
|
||||
if ((pml4pg->flags & PG_ZERO) == 0)
|
||||
pagezero(pmap->pm_pml4);
|
||||
|
||||
@ -2434,10 +2483,19 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
|
||||
* Do not install the host kernel mappings in the nested page
|
||||
* tables. These mappings are meaningless in the guest physical
|
||||
* address space.
|
||||
* Install minimal kernel mappings in PTI case.
|
||||
*/
|
||||
if ((pmap->pm_type = pm_type) == PT_X86) {
|
||||
if (pm_type == PT_X86) {
|
||||
pmap->pm_cr3 = pml4phys;
|
||||
pmap_pinit_pml4(pml4pg);
|
||||
if (pti) {
|
||||
pml4pgu = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
|
||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_WAITOK);
|
||||
pmap->pm_pml4u = (pml4_entry_t *)PHYS_TO_DMAP(
|
||||
VM_PAGE_TO_PHYS(pml4pgu));
|
||||
pmap_pinit_pml4_pti(pml4pgu);
|
||||
pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pml4pgu);
|
||||
}
|
||||
}
|
||||
|
||||
pmap->pm_root.rt_root = 0;
|
||||
@ -2509,13 +2567,18 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
|
||||
*/
|
||||
|
||||
if (ptepindex >= (NUPDE + NUPDPE)) {
|
||||
pml4_entry_t *pml4;
|
||||
pml4_entry_t *pml4, *pml4u;
|
||||
vm_pindex_t pml4index;
|
||||
|
||||
/* Wire up a new PDPE page */
|
||||
pml4index = ptepindex - (NUPDE + NUPDPE);
|
||||
pml4 = &pmap->pm_pml4[pml4index];
|
||||
*pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
|
||||
if (pmap->pm_pml4u != NULL && pml4index < NUPML4E) {
|
||||
pml4u = &pmap->pm_pml4u[pml4index];
|
||||
*pml4u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V |
|
||||
PG_A | PG_M;
|
||||
}
|
||||
|
||||
} else if (ptepindex >= NUPDE) {
|
||||
vm_pindex_t pml4index;
|
||||
@ -2716,6 +2779,13 @@ pmap_release(pmap_t pmap)
|
||||
m->wire_count--;
|
||||
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
|
||||
vm_page_free_zero(m);
|
||||
|
||||
if (pmap->pm_pml4u != NULL) {
|
||||
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4u));
|
||||
m->wire_count--;
|
||||
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
|
||||
vm_page_free(m);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
@ -7141,6 +7211,10 @@ pmap_activate_sw(struct thread *td)
|
||||
} else if (cr3 != pmap->pm_cr3) {
|
||||
load_cr3(pmap->pm_cr3);
|
||||
PCPU_SET(curpmap, pmap);
|
||||
if (pti) {
|
||||
PCPU_SET(kcr3, pmap->pm_cr3);
|
||||
PCPU_SET(ucr3, pmap->pm_ucr3);
|
||||
}
|
||||
}
|
||||
#ifdef SMP
|
||||
CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
|
||||
@ -7464,6 +7538,291 @@ pmap_quick_remove_page(vm_offset_t addr)
|
||||
mtx_unlock_spin(&qframe_mtx);
|
||||
}
|
||||
|
||||
static vm_page_t
|
||||
pmap_pti_alloc_page(void)
|
||||
{
|
||||
vm_page_t m;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(pti_obj);
|
||||
m = vm_page_grab(pti_obj, pti_pg_idx++, VM_ALLOC_NOBUSY |
|
||||
VM_ALLOC_WIRED | VM_ALLOC_ZERO);
|
||||
return (m);
|
||||
}
|
||||
|
||||
static bool
|
||||
pmap_pti_free_page(vm_page_t m, bool last)
|
||||
{
|
||||
|
||||
m->wire_count--;
|
||||
if (m->wire_count == 0 || last) {
|
||||
KASSERT(m->wire_count == 0, ("page %p wired", m));
|
||||
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
|
||||
vm_page_free_zero(m);
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
extern char kernphys[], etext[];
|
||||
|
||||
static void
|
||||
pmap_pti_init(void)
|
||||
{
|
||||
vm_page_t pml4_pg;
|
||||
pdp_entry_t *pdpe;
|
||||
vm_offset_t va;
|
||||
int i;
|
||||
|
||||
if (!pti)
|
||||
return;
|
||||
pti_obj = vm_pager_allocate(OBJT_PHYS, NULL, 0, VM_PROT_ALL, 0, NULL);
|
||||
VM_OBJECT_WLOCK(pti_obj);
|
||||
pml4_pg = pmap_pti_alloc_page();
|
||||
pti_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4_pg));
|
||||
for (va = VM_MIN_KERNEL_ADDRESS; va <= VM_MAX_KERNEL_ADDRESS &&
|
||||
va >= VM_MIN_KERNEL_ADDRESS && va > NBPML4; va += NBPML4) {
|
||||
pdpe = pmap_pti_pdpe(va);
|
||||
pmap_pti_wire_pte(pdpe);
|
||||
}
|
||||
pmap_pti_add_kva_locked((vm_offset_t)&__pcpu[0],
|
||||
(vm_offset_t)&__pcpu[0] + sizeof(__pcpu[0]) * MAXCPU, false);
|
||||
pmap_pti_add_kva_locked((vm_offset_t)gdt, (vm_offset_t)gdt +
|
||||
sizeof(struct user_segment_descriptor) * NGDT * MAXCPU, false);
|
||||
pmap_pti_add_kva_locked((vm_offset_t)idt, (vm_offset_t)idt +
|
||||
sizeof(struct gate_descriptor) * NIDT, false);
|
||||
pmap_pti_add_kva_locked((vm_offset_t)common_tss,
|
||||
(vm_offset_t)common_tss + sizeof(struct amd64tss) * MAXCPU, false);
|
||||
CPU_FOREACH(i) {
|
||||
/* Doublefault stack IST 1 */
|
||||
va = common_tss[i].tss_ist1;
|
||||
pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false);
|
||||
/* NMI stack IST 2 */
|
||||
va = common_tss[i].tss_ist2 + sizeof(struct nmi_pcpu);
|
||||
pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false);
|
||||
}
|
||||
pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE,
|
||||
(vm_offset_t)etext, true);
|
||||
pti_finalized = true;
|
||||
VM_OBJECT_WUNLOCK(pti_obj);
|
||||
}
|
||||
SYSINIT(pmap_pti, SI_SUB_CPU + 1, SI_ORDER_ANY, pmap_pti_init, NULL);
|
||||
|
||||
static pdp_entry_t *
|
||||
pmap_pti_pdpe(vm_offset_t va)
|
||||
{
|
||||
pml4_entry_t *pml4e;
|
||||
pdp_entry_t *pdpe;
|
||||
vm_page_t m;
|
||||
vm_pindex_t pml4_idx;
|
||||
vm_paddr_t mphys;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(pti_obj);
|
||||
|
||||
pml4_idx = pmap_pml4e_index(va);
|
||||
pml4e = &pti_pml4[pml4_idx];
|
||||
m = NULL;
|
||||
if (*pml4e == 0) {
|
||||
if (pti_finalized)
|
||||
panic("pml4 alloc after finalization\n");
|
||||
m = pmap_pti_alloc_page();
|
||||
if (*pml4e != 0) {
|
||||
pmap_pti_free_page(m, true);
|
||||
mphys = *pml4e & ~PAGE_MASK;
|
||||
} else {
|
||||
mphys = VM_PAGE_TO_PHYS(m);
|
||||
*pml4e = mphys | X86_PG_RW | X86_PG_V;
|
||||
}
|
||||
} else {
|
||||
mphys = *pml4e & ~PAGE_MASK;
|
||||
}
|
||||
pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys) + pmap_pdpe_index(va);
|
||||
return (pdpe);
|
||||
}
|
||||
|
||||
static void
|
||||
pmap_pti_wire_pte(void *pte)
|
||||
{
|
||||
vm_page_t m;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(pti_obj);
|
||||
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte));
|
||||
m->wire_count++;
|
||||
}
|
||||
|
||||
static void
|
||||
pmap_pti_unwire_pde(void *pde, bool only_ref)
|
||||
{
|
||||
vm_page_t m;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(pti_obj);
|
||||
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde));
|
||||
MPASS(m->wire_count > 0);
|
||||
MPASS(only_ref || m->wire_count > 1);
|
||||
pmap_pti_free_page(m, false);
|
||||
}
|
||||
|
||||
static void
|
||||
pmap_pti_unwire_pte(void *pte, vm_offset_t va)
|
||||
{
|
||||
vm_page_t m;
|
||||
pd_entry_t *pde;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(pti_obj);
|
||||
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte));
|
||||
MPASS(m->wire_count > 0);
|
||||
if (pmap_pti_free_page(m, false)) {
|
||||
pde = pmap_pti_pde(va);
|
||||
MPASS((*pde & (X86_PG_PS | X86_PG_V)) == X86_PG_V);
|
||||
*pde = 0;
|
||||
pmap_pti_unwire_pde(pde, false);
|
||||
}
|
||||
}
|
||||
|
||||
static pd_entry_t *
|
||||
pmap_pti_pde(vm_offset_t va)
|
||||
{
|
||||
pdp_entry_t *pdpe;
|
||||
pd_entry_t *pde;
|
||||
vm_page_t m;
|
||||
vm_pindex_t pd_idx;
|
||||
vm_paddr_t mphys;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(pti_obj);
|
||||
|
||||
pdpe = pmap_pti_pdpe(va);
|
||||
if (*pdpe == 0) {
|
||||
m = pmap_pti_alloc_page();
|
||||
if (*pdpe != 0) {
|
||||
pmap_pti_free_page(m, true);
|
||||
MPASS((*pdpe & X86_PG_PS) == 0);
|
||||
mphys = *pdpe & ~PAGE_MASK;
|
||||
} else {
|
||||
mphys = VM_PAGE_TO_PHYS(m);
|
||||
*pdpe = mphys | X86_PG_RW | X86_PG_V;
|
||||
}
|
||||
} else {
|
||||
MPASS((*pdpe & X86_PG_PS) == 0);
|
||||
mphys = *pdpe & ~PAGE_MASK;
|
||||
}
|
||||
|
||||
pde = (pd_entry_t *)PHYS_TO_DMAP(mphys);
|
||||
pd_idx = pmap_pde_index(va);
|
||||
pde += pd_idx;
|
||||
return (pde);
|
||||
}
|
||||
|
||||
static pt_entry_t *
|
||||
pmap_pti_pte(vm_offset_t va, bool *unwire_pde)
|
||||
{
|
||||
pd_entry_t *pde;
|
||||
pt_entry_t *pte;
|
||||
vm_page_t m;
|
||||
vm_paddr_t mphys;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(pti_obj);
|
||||
|
||||
pde = pmap_pti_pde(va);
|
||||
if (unwire_pde != NULL) {
|
||||
*unwire_pde = true;
|
||||
pmap_pti_wire_pte(pde);
|
||||
}
|
||||
if (*pde == 0) {
|
||||
m = pmap_pti_alloc_page();
|
||||
if (*pde != 0) {
|
||||
pmap_pti_free_page(m, true);
|
||||
MPASS((*pde & X86_PG_PS) == 0);
|
||||
mphys = *pde & ~(PAGE_MASK | pg_nx);
|
||||
} else {
|
||||
mphys = VM_PAGE_TO_PHYS(m);
|
||||
*pde = mphys | X86_PG_RW | X86_PG_V;
|
||||
if (unwire_pde != NULL)
|
||||
*unwire_pde = false;
|
||||
}
|
||||
} else {
|
||||
MPASS((*pde & X86_PG_PS) == 0);
|
||||
mphys = *pde & ~(PAGE_MASK | pg_nx);
|
||||
}
|
||||
|
||||
pte = (pt_entry_t *)PHYS_TO_DMAP(mphys);
|
||||
pte += pmap_pte_index(va);
|
||||
|
||||
return (pte);
|
||||
}
|
||||
|
||||
static void
|
||||
pmap_pti_add_kva_locked(vm_offset_t sva, vm_offset_t eva, bool exec)
|
||||
{
|
||||
vm_paddr_t pa;
|
||||
pd_entry_t *pde;
|
||||
pt_entry_t *pte, ptev;
|
||||
bool unwire_pde;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(pti_obj);
|
||||
|
||||
sva = trunc_page(sva);
|
||||
MPASS(sva > VM_MAXUSER_ADDRESS);
|
||||
eva = round_page(eva);
|
||||
MPASS(sva < eva);
|
||||
for (; sva < eva; sva += PAGE_SIZE) {
|
||||
pte = pmap_pti_pte(sva, &unwire_pde);
|
||||
pa = pmap_kextract(sva);
|
||||
ptev = pa | X86_PG_RW | X86_PG_V | X86_PG_A |
|
||||
(exec ? 0 : pg_nx) | pmap_cache_bits(kernel_pmap,
|
||||
VM_MEMATTR_DEFAULT, FALSE);
|
||||
if (*pte == 0) {
|
||||
pte_store(pte, ptev);
|
||||
pmap_pti_wire_pte(pte);
|
||||
} else {
|
||||
KASSERT(!pti_finalized,
|
||||
("pti overlap after fin %#lx %#lx %#lx",
|
||||
sva, *pte, ptev));
|
||||
KASSERT(*pte == ptev,
|
||||
("pti non-identical pte after fin %#lx %#lx %#lx",
|
||||
sva, *pte, ptev));
|
||||
}
|
||||
if (unwire_pde) {
|
||||
pde = pmap_pti_pde(sva);
|
||||
pmap_pti_unwire_pde(pde, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pmap_pti_add_kva(vm_offset_t sva, vm_offset_t eva, bool exec)
|
||||
{
|
||||
|
||||
if (!pti)
|
||||
return;
|
||||
VM_OBJECT_WLOCK(pti_obj);
|
||||
pmap_pti_add_kva_locked(sva, eva, exec);
|
||||
VM_OBJECT_WUNLOCK(pti_obj);
|
||||
}
|
||||
|
||||
void
|
||||
pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva)
|
||||
{
|
||||
pt_entry_t *pte;
|
||||
vm_offset_t va;
|
||||
|
||||
if (!pti)
|
||||
return;
|
||||
sva = rounddown2(sva, PAGE_SIZE);
|
||||
MPASS(sva > VM_MAXUSER_ADDRESS);
|
||||
eva = roundup2(eva, PAGE_SIZE);
|
||||
MPASS(sva < eva);
|
||||
VM_OBJECT_WLOCK(pti_obj);
|
||||
for (va = sva; va < eva; va += PAGE_SIZE) {
|
||||
pte = pmap_pti_pte(va, NULL);
|
||||
KASSERT((*pte & X86_PG_V) != 0,
|
||||
("invalid pte va %#lx pte %#lx pt %#lx", va,
|
||||
(u_long)pte, *pte));
|
||||
pte_clear(pte);
|
||||
pmap_pti_unwire_pte(pte, va);
|
||||
}
|
||||
pmap_invalidate_range(kernel_pmap, sva, eva);
|
||||
VM_OBJECT_WUNLOCK(pti_obj);
|
||||
}
|
||||
|
||||
#include "opt_ddb.h"
|
||||
#ifdef DDB
|
||||
#include <sys/kdb.h>
|
||||
|
@ -362,7 +362,9 @@ amd64_set_ioperm(td, uap)
|
||||
pcb = td->td_pcb;
|
||||
if (pcb->pcb_tssp == NULL) {
|
||||
tssp = (struct amd64tss *)kmem_malloc(kernel_arena,
|
||||
ctob(IOPAGES+1), M_WAITOK);
|
||||
ctob(IOPAGES + 1), M_WAITOK);
|
||||
pmap_pti_add_kva((vm_offset_t)tssp, (vm_offset_t)tssp +
|
||||
ctob(IOPAGES + 1), false);
|
||||
iomap = (char *)&tssp[1];
|
||||
memset(iomap, 0xff, IOPERM_BITMAP_SIZE);
|
||||
critical_enter();
|
||||
@ -451,6 +453,8 @@ user_ldt_alloc(struct proc *p, int force)
|
||||
struct proc_ldt *pldt, *new_ldt;
|
||||
struct mdproc *mdp;
|
||||
struct soft_segment_descriptor sldt;
|
||||
vm_offset_t sva;
|
||||
vm_size_t sz;
|
||||
|
||||
mtx_assert(&dt_lock, MA_OWNED);
|
||||
mdp = &p->p_md;
|
||||
@ -458,13 +462,13 @@ user_ldt_alloc(struct proc *p, int force)
|
||||
return (mdp->md_ldt);
|
||||
mtx_unlock(&dt_lock);
|
||||
new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
|
||||
new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
|
||||
max_ldt_segment * sizeof(struct user_segment_descriptor),
|
||||
M_WAITOK | M_ZERO);
|
||||
sz = max_ldt_segment * sizeof(struct user_segment_descriptor);
|
||||
sva = kmem_malloc(kernel_arena, sz, M_WAITOK | M_ZERO);
|
||||
new_ldt->ldt_base = (caddr_t)sva;
|
||||
pmap_pti_add_kva(sva, sva + sz, false);
|
||||
new_ldt->ldt_refcnt = 1;
|
||||
sldt.ssd_base = (uint64_t)new_ldt->ldt_base;
|
||||
sldt.ssd_limit = max_ldt_segment *
|
||||
sizeof(struct user_segment_descriptor) - 1;
|
||||
sldt.ssd_base = sva;
|
||||
sldt.ssd_limit = sz - 1;
|
||||
sldt.ssd_type = SDT_SYSLDT;
|
||||
sldt.ssd_dpl = SEL_KPL;
|
||||
sldt.ssd_p = 1;
|
||||
@ -474,8 +478,8 @@ user_ldt_alloc(struct proc *p, int force)
|
||||
mtx_lock(&dt_lock);
|
||||
pldt = mdp->md_ldt;
|
||||
if (pldt != NULL && !force) {
|
||||
kmem_free(kernel_arena, (vm_offset_t)new_ldt->ldt_base,
|
||||
max_ldt_segment * sizeof(struct user_segment_descriptor));
|
||||
pmap_pti_remove_kva(sva, sva + sz);
|
||||
kmem_free(kernel_arena, sva, sz);
|
||||
free(new_ldt, M_SUBPROC);
|
||||
return (pldt);
|
||||
}
|
||||
@ -522,10 +526,14 @@ user_ldt_free(struct thread *td)
|
||||
static void
|
||||
user_ldt_derefl(struct proc_ldt *pldt)
|
||||
{
|
||||
vm_offset_t sva;
|
||||
vm_size_t sz;
|
||||
|
||||
if (--pldt->ldt_refcnt == 0) {
|
||||
kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base,
|
||||
max_ldt_segment * sizeof(struct user_segment_descriptor));
|
||||
sva = (vm_offset_t)pldt->ldt_base;
|
||||
sz = max_ldt_segment * sizeof(struct user_segment_descriptor);
|
||||
pmap_pti_remove_kva(sva, sva + sz);
|
||||
kmem_free(kernel_arena, sva, sz);
|
||||
free(pldt, M_SUBPROC);
|
||||
}
|
||||
}
|
||||
|
@ -448,9 +448,28 @@ trap(struct trapframe *frame)
|
||||
* problem here and not have to check all the
|
||||
* selectors and pointers when the user changes
|
||||
* them.
|
||||
*
|
||||
* In case of PTI, the IRETQ faulted while the
|
||||
* kernel used the pti stack, and exception
|
||||
* frame records %rsp value pointing to that
|
||||
* stack. If we return normally to
|
||||
* doreti_iret_fault, the trapframe is
|
||||
* reconstructed on pti stack, and calltrap()
|
||||
* called on it as well. Due to the very
|
||||
* limited pti stack size, kernel does not
|
||||
* survive for too long. Switch to the normal
|
||||
* thread stack for the trap handling.
|
||||
*
|
||||
* Magic '5' is the number of qwords occupied by
|
||||
* the hardware trap frame.
|
||||
*/
|
||||
if (frame->tf_rip == (long)doreti_iret) {
|
||||
frame->tf_rip = (long)doreti_iret_fault;
|
||||
if (pti && frame->tf_rsp == (uintptr_t)PCPU_PTR(
|
||||
pti_stack) + (PC_PTI_STACK_SZ - 5) *
|
||||
sizeof(register_t))
|
||||
frame->tf_rsp = PCPU_GET(rsp0) - 5 *
|
||||
sizeof(register_t);
|
||||
return;
|
||||
}
|
||||
if (frame->tf_rip == (long)ld_ds) {
|
||||
|
@ -339,6 +339,8 @@ cpu_thread_clean(struct thread *td)
|
||||
* Clean TSS/iomap
|
||||
*/
|
||||
if (pcb->pcb_tssp != NULL) {
|
||||
pmap_pti_remove_kva((vm_offset_t)pcb->pcb_tssp,
|
||||
(vm_offset_t)pcb->pcb_tssp + ctob(IOPAGES + 1));
|
||||
kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_tssp,
|
||||
ctob(IOPAGES + 1));
|
||||
pcb->pcb_tssp = NULL;
|
||||
|
@ -40,17 +40,19 @@
|
||||
* that it originated in supervisor mode and skip the swapgs.
|
||||
*/
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(int0x80_syscall_pti)
|
||||
PTI_UENTRY has_err=0
|
||||
jmp int0x80_syscall_common
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(int0x80_syscall)
|
||||
swapgs
|
||||
int0x80_syscall_common:
|
||||
pushq $2 /* sizeof "int 0x80" */
|
||||
subq $TF_ERR,%rsp /* skip over tf_trapno */
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
movq PCPU(CURPCB),%rdi
|
||||
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
SAVE_SEGS
|
||||
sti
|
||||
movq %rsi,TF_RSI(%rsp)
|
||||
movq %rdx,TF_RDX(%rsp)
|
||||
|
@ -95,7 +95,8 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#define IDTVEC(name) __CONCAT(X,name)
|
||||
|
||||
extern inthand_t IDTVEC(int0x80_syscall), IDTVEC(rsvd);
|
||||
extern inthand_t IDTVEC(int0x80_syscall), IDTVEC(int0x80_syscall_pti),
|
||||
IDTVEC(rsvd), IDTVEC(rsvd_pti);
|
||||
|
||||
void ia32_syscall(struct trapframe *frame); /* Called from asm code */
|
||||
|
||||
@ -208,14 +209,16 @@ static void
|
||||
ia32_syscall_enable(void *dummy)
|
||||
{
|
||||
|
||||
setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYSIGT, SEL_UPL, 0);
|
||||
setidt(IDT_SYSCALL, pti ? &IDTVEC(int0x80_syscall_pti) :
|
||||
&IDTVEC(int0x80_syscall), SDT_SYSIGT, SEL_UPL, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
ia32_syscall_disable(void *dummy)
|
||||
{
|
||||
|
||||
setidt(IDT_SYSCALL, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
|
||||
setidt(IDT_SYSCALL, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd),
|
||||
SDT_SYSIGT, SEL_KPL, 0);
|
||||
}
|
||||
|
||||
SYSINIT(ia32_syscall, SI_SUB_EXEC, SI_ORDER_ANY, ia32_syscall_enable, NULL);
|
||||
|
@ -1,9 +1,17 @@
|
||||
/* -*- mode: asm -*- */
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 1993 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2018 The FreeBSD Foundation
|
||||
* All rights reserved.
|
||||
*
|
||||
* Portions of this software were developed by
|
||||
* Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
|
||||
* the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
@ -145,69 +153,6 @@
|
||||
popq %rbp
|
||||
|
||||
#ifdef LOCORE
|
||||
/*
|
||||
* Convenience macro for declaring interrupt entry points.
|
||||
*/
|
||||
#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \
|
||||
.type __CONCAT(X,name),@function; __CONCAT(X,name):
|
||||
|
||||
/*
|
||||
* Macros to create and destroy a trap frame.
|
||||
*/
|
||||
#define PUSH_FRAME \
|
||||
subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \
|
||||
jz 1f ; /* Yes, dont swapgs again */ \
|
||||
swapgs ; \
|
||||
1: movq %rdi,TF_RDI(%rsp) ; \
|
||||
movq %rsi,TF_RSI(%rsp) ; \
|
||||
movq %rdx,TF_RDX(%rsp) ; \
|
||||
movq %rcx,TF_RCX(%rsp) ; \
|
||||
movq %r8,TF_R8(%rsp) ; \
|
||||
movq %r9,TF_R9(%rsp) ; \
|
||||
movq %rax,TF_RAX(%rsp) ; \
|
||||
movq %rbx,TF_RBX(%rsp) ; \
|
||||
movq %rbp,TF_RBP(%rsp) ; \
|
||||
movq %r10,TF_R10(%rsp) ; \
|
||||
movq %r11,TF_R11(%rsp) ; \
|
||||
movq %r12,TF_R12(%rsp) ; \
|
||||
movq %r13,TF_R13(%rsp) ; \
|
||||
movq %r14,TF_R14(%rsp) ; \
|
||||
movq %r15,TF_R15(%rsp) ; \
|
||||
movw %fs,TF_FS(%rsp) ; \
|
||||
movw %gs,TF_GS(%rsp) ; \
|
||||
movw %es,TF_ES(%rsp) ; \
|
||||
movw %ds,TF_DS(%rsp) ; \
|
||||
movl $TF_HASSEGS,TF_FLAGS(%rsp) ; \
|
||||
cld ; \
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel ? */ \
|
||||
jz 2f ; /* yes, leave PCB_FULL_IRET alone */ \
|
||||
movq PCPU(CURPCB),%r8 ; \
|
||||
andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) ; \
|
||||
2:
|
||||
|
||||
#define POP_FRAME \
|
||||
movq TF_RDI(%rsp),%rdi ; \
|
||||
movq TF_RSI(%rsp),%rsi ; \
|
||||
movq TF_RDX(%rsp),%rdx ; \
|
||||
movq TF_RCX(%rsp),%rcx ; \
|
||||
movq TF_R8(%rsp),%r8 ; \
|
||||
movq TF_R9(%rsp),%r9 ; \
|
||||
movq TF_RAX(%rsp),%rax ; \
|
||||
movq TF_RBX(%rsp),%rbx ; \
|
||||
movq TF_RBP(%rsp),%rbp ; \
|
||||
movq TF_R10(%rsp),%r10 ; \
|
||||
movq TF_R11(%rsp),%r11 ; \
|
||||
movq TF_R12(%rsp),%r12 ; \
|
||||
movq TF_R13(%rsp),%r13 ; \
|
||||
movq TF_R14(%rsp),%r14 ; \
|
||||
movq TF_R15(%rsp),%r15 ; \
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \
|
||||
jz 1f ; /* keep kernel GS.base */ \
|
||||
cli ; \
|
||||
swapgs ; \
|
||||
1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */
|
||||
|
||||
/*
|
||||
* Access per-CPU data.
|
||||
*/
|
||||
@ -216,6 +161,125 @@
|
||||
movq %gs:PC_PRVSPACE, reg ; \
|
||||
addq $PC_ ## member, reg
|
||||
|
||||
/*
|
||||
* Convenience macro for declaring interrupt entry points.
|
||||
*/
|
||||
#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \
|
||||
.type __CONCAT(X,name),@function; __CONCAT(X,name):
|
||||
|
||||
.macro SAVE_SEGS
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
.endm
|
||||
|
||||
.macro MOVE_STACKS qw
|
||||
offset=0
|
||||
.rept \qw
|
||||
movq offset(%rsp),%rdx
|
||||
movq %rdx,offset(%rax)
|
||||
offset=offset+8
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.macro PTI_UENTRY has_err
|
||||
swapgs
|
||||
pushq %rax
|
||||
pushq %rdx
|
||||
movq PCPU(KCR3),%rax
|
||||
movq %rax,%cr3
|
||||
movq PCPU(RSP0),%rax
|
||||
subq $PTI_SIZE,%rax
|
||||
MOVE_STACKS (PTI_SIZE / 8) - 1 + \has_err
|
||||
movq %rax,%rsp
|
||||
popq %rdx
|
||||
popq %rax
|
||||
.endm
|
||||
|
||||
.macro PTI_ENTRY name, cont, has_err=0
|
||||
ALIGN_TEXT
|
||||
.globl X\name\()_pti
|
||||
.type X\name\()_pti,@function
|
||||
X\name\()_pti:
|
||||
/* %rax, %rdx and possibly err not yet pushed */
|
||||
testb $SEL_RPL_MASK,PTI_CS-(2+1-\has_err)*8(%rsp)
|
||||
jz \cont
|
||||
PTI_UENTRY \has_err
|
||||
swapgs
|
||||
jmp \cont
|
||||
.endm
|
||||
|
||||
.macro PTI_INTRENTRY vec_name
|
||||
SUPERALIGN_TEXT
|
||||
.globl X\vec_name\()_pti
|
||||
.type X\vec_name\()_pti,@function
|
||||
X\vec_name\()_pti:
|
||||
testb $SEL_RPL_MASK,PTI_CS-3*8(%rsp) /* err, %rax, %rdx not pushed */
|
||||
jz \vec_name\()_u
|
||||
PTI_UENTRY has_err=0
|
||||
jmp \vec_name\()_u
|
||||
.endm
|
||||
|
||||
.macro INTR_PUSH_FRAME vec_name
|
||||
SUPERALIGN_TEXT
|
||||
.globl X\vec_name
|
||||
.type X\vec_name,@function
|
||||
X\vec_name:
|
||||
testb $SEL_RPL_MASK,PTI_CS-3*8(%rsp) /* come from kernel? */
|
||||
jz \vec_name\()_u /* Yes, dont swapgs again */
|
||||
swapgs
|
||||
\vec_name\()_u:
|
||||
subq $TF_RIP,%rsp /* skip dummy tf_err and tf_trapno */
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
movq %rsi,TF_RSI(%rsp)
|
||||
movq %rdx,TF_RDX(%rsp)
|
||||
movq %rcx,TF_RCX(%rsp)
|
||||
movq %r8,TF_R8(%rsp)
|
||||
movq %r9,TF_R9(%rsp)
|
||||
movq %rax,TF_RAX(%rsp)
|
||||
movq %rbx,TF_RBX(%rsp)
|
||||
movq %rbp,TF_RBP(%rsp)
|
||||
movq %r10,TF_R10(%rsp)
|
||||
movq %r11,TF_R11(%rsp)
|
||||
movq %r12,TF_R12(%rsp)
|
||||
movq %r13,TF_R13(%rsp)
|
||||
movq %r14,TF_R14(%rsp)
|
||||
movq %r15,TF_R15(%rsp)
|
||||
SAVE_SEGS
|
||||
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
||||
cld
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* come from kernel ? */
|
||||
jz 1f /* yes, leave PCB_FULL_IRET alone */
|
||||
movq PCPU(CURPCB),%r8
|
||||
andl $~PCB_FULL_IRET,PCB_FLAGS(%r8)
|
||||
1:
|
||||
.endm
|
||||
|
||||
.macro INTR_HANDLER vec_name
|
||||
.text
|
||||
PTI_INTRENTRY \vec_name
|
||||
INTR_PUSH_FRAME \vec_name
|
||||
.endm
|
||||
|
||||
.macro RESTORE_REGS
|
||||
movq TF_RDI(%rsp),%rdi
|
||||
movq TF_RSI(%rsp),%rsi
|
||||
movq TF_RDX(%rsp),%rdx
|
||||
movq TF_RCX(%rsp),%rcx
|
||||
movq TF_R8(%rsp),%r8
|
||||
movq TF_R9(%rsp),%r9
|
||||
movq TF_RAX(%rsp),%rax
|
||||
movq TF_RBX(%rsp),%rbx
|
||||
movq TF_RBP(%rsp),%rbp
|
||||
movq TF_R10(%rsp),%r10
|
||||
movq TF_R11(%rsp),%r11
|
||||
movq TF_R12(%rsp),%r12
|
||||
movq TF_R13(%rsp),%r13
|
||||
movq TF_R14(%rsp),%r14
|
||||
movq TF_R15(%rsp),%r15
|
||||
.endm
|
||||
|
||||
#endif /* LOCORE */
|
||||
|
||||
#ifdef __STDC__
|
||||
|
@ -1,6 +1,50 @@
|
||||
/*-
|
||||
* This file is in the public domain.
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||
*
|
||||
* Copyright (c) 2018 The FreeBSD Foundation
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
|
||||
* under sponsorship from the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#ifndef _AMD64_FRAME_H
|
||||
#define _AMD64_FRAME_H
|
||||
|
||||
#include <x86/frame.h>
|
||||
|
||||
struct pti_frame {
|
||||
register_t pti_rdx;
|
||||
register_t pti_rax;
|
||||
register_t pti_err;
|
||||
register_t pti_rip;
|
||||
register_t pti_cs;
|
||||
register_t pti_rflags;
|
||||
register_t pti_rsp;
|
||||
register_t pti_ss;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -35,6 +35,7 @@
|
||||
#error "sys/cdefs.h is a prerequisite for this file"
|
||||
#endif
|
||||
|
||||
#define PC_PTI_STACK_SZ 16
|
||||
/*
|
||||
* The SMP parts are setup in pmap.c and locore.s for the BSP, and
|
||||
* mp_machdep.c sets up the data for the AP's to "see" when they awake.
|
||||
@ -48,8 +49,11 @@
|
||||
struct pmap *pc_curpmap; \
|
||||
struct amd64tss *pc_tssp; /* TSS segment active on CPU */ \
|
||||
struct amd64tss *pc_commontssp;/* Common TSS for the CPU */ \
|
||||
uint64_t pc_kcr3; \
|
||||
uint64_t pc_ucr3; \
|
||||
register_t pc_rsp0; \
|
||||
register_t pc_scratch_rsp; /* User %rsp in syscall */ \
|
||||
register_t pc_scratch_rax; \
|
||||
u_int pc_apic_id; \
|
||||
u_int pc_acpi_id; /* ACPI CPU id */ \
|
||||
/* Pointer to the CPU %fs descriptor */ \
|
||||
@ -63,12 +67,13 @@
|
||||
uint64_t pc_pm_save_cnt; \
|
||||
u_int pc_cmci_mask; /* MCx banks for CMCI */ \
|
||||
uint64_t pc_dbreg[16]; /* ddb debugging regs */ \
|
||||
uint64_t pc_pti_stack[PC_PTI_STACK_SZ]; \
|
||||
int pc_dbreg_cmd; /* ddb debugging reg cmd */ \
|
||||
u_int pc_vcpu_id; /* Xen vCPU ID */ \
|
||||
uint32_t pc_pcid_next; \
|
||||
uint32_t pc_pcid_gen; \
|
||||
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
|
||||
char __pad[384] /* be divisor of PAGE_SIZE \
|
||||
char __pad[232] /* be divisor of PAGE_SIZE \
|
||||
after cache alignment */
|
||||
|
||||
#define PC_DBREG_CMD_NONE 0
|
||||
|
@ -315,7 +315,9 @@ struct pmap_pcids {
|
||||
struct pmap {
|
||||
struct mtx pm_mtx;
|
||||
pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
|
||||
pml4_entry_t *pm_pml4u; /* KVA of user l4 page table */
|
||||
uint64_t pm_cr3;
|
||||
uint64_t pm_ucr3;
|
||||
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
|
||||
cpuset_t pm_active; /* active on cpus */
|
||||
enum pmap_type pm_type; /* regular or nested tables */
|
||||
@ -429,6 +431,8 @@ void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
|
||||
void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
|
||||
boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
|
||||
void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
|
||||
void pmap_pti_add_kva(vm_offset_t sva, vm_offset_t eva, bool exec);
|
||||
void pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva);
|
||||
#endif /* _KERNEL */
|
||||
|
||||
/* Return various clipped indexes for a given VA */
|
||||
|
@ -30,7 +30,18 @@ extern u_int32_t mptramp_pagetables;
|
||||
inthand_t
|
||||
IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */
|
||||
IDTVEC(invltlb_invpcid),/* TLB shootdowns - global, invpcid */
|
||||
IDTVEC(justreturn); /* interrupt CPU with minimum overhead */
|
||||
IDTVEC(justreturn), /* interrupt CPU with minimum overhead */
|
||||
IDTVEC(invltlb_pcid_pti),
|
||||
IDTVEC(invltlb_invpcid_pti),
|
||||
IDTVEC(justreturn1_pti),
|
||||
IDTVEC(invltlb_pti),
|
||||
IDTVEC(invlpg_pti),
|
||||
IDTVEC(invlrng_pti),
|
||||
IDTVEC(invlcache_pti),
|
||||
IDTVEC(ipi_intr_bitmap_handler_pti),
|
||||
IDTVEC(cpustop_pti),
|
||||
IDTVEC(cpususpend_pti),
|
||||
IDTVEC(rendezvous_pti);
|
||||
|
||||
void invltlb_pcid_handler(void);
|
||||
void invltlb_invpcid_handler(void);
|
||||
|
@ -695,7 +695,8 @@ vmx_init(int ipinum)
|
||||
MSR_VMX_TRUE_PINBASED_CTLS, PINBASED_POSTED_INTERRUPT, 0,
|
||||
&tmp);
|
||||
if (error == 0) {
|
||||
pirvec = lapic_ipi_alloc(&IDTVEC(justreturn));
|
||||
pirvec = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) :
|
||||
&IDTVEC(justreturn));
|
||||
if (pirvec < 0) {
|
||||
if (bootverbose) {
|
||||
printf("vmx_init: unable to allocate "
|
||||
|
@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <machine/cpu.h>
|
||||
#include <machine/pcb.h>
|
||||
#include <machine/smp.h>
|
||||
#include <machine/md_var.h>
|
||||
#include <x86/psl.h>
|
||||
#include <x86/apicreg.h>
|
||||
|
||||
@ -327,7 +328,8 @@ vmm_init(void)
|
||||
|
||||
vmm_host_state_init();
|
||||
|
||||
vmm_ipinum = lapic_ipi_alloc(&IDTVEC(justreturn));
|
||||
vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) :
|
||||
&IDTVEC(justreturn));
|
||||
if (vmm_ipinum < 0)
|
||||
vmm_ipinum = IPI_AST;
|
||||
|
||||
|
@ -26,19 +26,18 @@
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include "assym.s"
|
||||
|
||||
#include <machine/asmacros.h>
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include "assym.s"
|
||||
|
||||
/*
|
||||
* This is the Hyper-V vmbus channel direct callback interrupt.
|
||||
* Only used when it is running on Hyper-V.
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(vmbus_isr)
|
||||
PUSH_FRAME
|
||||
INTR_HANDLER vmbus_isr
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
movq %rsp, %rdi
|
||||
call vmbus_handle_intr
|
||||
|
@ -37,6 +37,7 @@
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(vmbus_isr_pti)
|
||||
IDTVEC(vmbus_isr)
|
||||
PUSH_FRAME
|
||||
SET_KERNEL_SREGS
|
||||
|
@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <machine/bus.h>
|
||||
#include <machine/intr_machdep.h>
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/resource.h>
|
||||
#include <x86/include/apicvar.h>
|
||||
|
||||
@ -135,7 +136,7 @@ static int vmbus_pin_evttask = 1;
|
||||
SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
|
||||
&vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
|
||||
|
||||
extern inthand_t IDTVEC(vmbus_isr);
|
||||
extern inthand_t IDTVEC(vmbus_isr), IDTVEC(vmbus_isr_pti);
|
||||
|
||||
static const uint32_t vmbus_version[] = {
|
||||
VMBUS_VERSION_WIN8_1,
|
||||
@ -941,7 +942,8 @@ vmbus_intr_setup(struct vmbus_softc *sc)
|
||||
* All Hyper-V ISR required resources are setup, now let's find a
|
||||
* free IDT vector for Hyper-V ISR and set it up.
|
||||
*/
|
||||
sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr));
|
||||
sc->vmbus_idtvec = lapic_ipi_alloc(pti ? IDTVEC(vmbus_isr_pti) :
|
||||
IDTVEC(vmbus_isr));
|
||||
if (sc->vmbus_idtvec < 0) {
|
||||
device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
|
||||
return ENXIO;
|
||||
|
@ -70,6 +70,7 @@ as_lapic_eoi:
|
||||
#define ISR_VEC(index, vec_name) \
|
||||
.text ; \
|
||||
SUPERALIGN_TEXT ; \
|
||||
IDTVEC(vec_name ## _pti) ; \
|
||||
IDTVEC(vec_name) ; \
|
||||
PUSH_FRAME ; \
|
||||
SET_KERNEL_SREGS ; \
|
||||
@ -123,6 +124,7 @@ IDTVEC(spuriousint)
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(timerint_pti)
|
||||
IDTVEC(timerint)
|
||||
PUSH_FRAME
|
||||
SET_KERNEL_SREGS
|
||||
@ -139,6 +141,7 @@ IDTVEC(timerint)
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(cmcint_pti)
|
||||
IDTVEC(cmcint)
|
||||
PUSH_FRAME
|
||||
SET_KERNEL_SREGS
|
||||
@ -153,6 +156,7 @@ IDTVEC(cmcint)
|
||||
*/
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(errorint_pti)
|
||||
IDTVEC(errorint)
|
||||
PUSH_FRAME
|
||||
SET_KERNEL_SREGS
|
||||
|
@ -46,6 +46,7 @@
|
||||
#define INTR(irq_num, vec_name) \
|
||||
.text ; \
|
||||
SUPERALIGN_TEXT ; \
|
||||
IDTVEC(vec_name ##_pti) ; \
|
||||
IDTVEC(vec_name) ; \
|
||||
PUSH_FRAME ; \
|
||||
SET_KERNEL_SREGS ; \
|
||||
|
@ -133,6 +133,7 @@ IDTVEC(page)
|
||||
TRAP(T_PAGEFLT)
|
||||
IDTVEC(mchk)
|
||||
pushl $0; TRAP(T_MCHK)
|
||||
IDTVEC(rsvd_pti)
|
||||
IDTVEC(rsvd)
|
||||
pushl $0; TRAP(T_RESERVED)
|
||||
IDTVEC(fpu)
|
||||
|
@ -280,6 +280,8 @@ SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
|
||||
"Number of times pmap_pte_quick didn't change PMAP1");
|
||||
static struct mtx PMAP2mutex;
|
||||
|
||||
int pti;
|
||||
|
||||
static void free_pv_chunk(struct pv_chunk *pc);
|
||||
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
|
||||
static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
|
||||
|
@ -185,7 +185,11 @@ inthand_t
|
||||
IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3),
|
||||
IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6),
|
||||
IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint),
|
||||
IDTVEC(spuriousint), IDTVEC(timerint);
|
||||
IDTVEC(spuriousint), IDTVEC(timerint),
|
||||
IDTVEC(apic_isr1_pti), IDTVEC(apic_isr2_pti), IDTVEC(apic_isr3_pti),
|
||||
IDTVEC(apic_isr4_pti), IDTVEC(apic_isr5_pti), IDTVEC(apic_isr6_pti),
|
||||
IDTVEC(apic_isr7_pti), IDTVEC(cmcint_pti), IDTVEC(errorint_pti),
|
||||
IDTVEC(spuriousint_pti), IDTVEC(timerint_pti);
|
||||
|
||||
extern vm_paddr_t lapic_paddr;
|
||||
extern int *apic_cpuids;
|
||||
|
@ -83,6 +83,7 @@ extern int _ugssel;
|
||||
extern int use_xsave;
|
||||
extern uint64_t xsave_mask;
|
||||
extern u_int max_apic_id;
|
||||
extern int pti;
|
||||
|
||||
struct pcb;
|
||||
struct thread;
|
||||
|
@ -80,6 +80,16 @@ inthand_t
|
||||
IDTVEC(atpic_intr9), IDTVEC(atpic_intr10), IDTVEC(atpic_intr11),
|
||||
IDTVEC(atpic_intr12), IDTVEC(atpic_intr13), IDTVEC(atpic_intr14),
|
||||
IDTVEC(atpic_intr15);
|
||||
/* XXXKIB i386 uses stubs until pti comes */
|
||||
inthand_t
|
||||
IDTVEC(atpic_intr0_pti), IDTVEC(atpic_intr1_pti),
|
||||
IDTVEC(atpic_intr2_pti), IDTVEC(atpic_intr3_pti),
|
||||
IDTVEC(atpic_intr4_pti), IDTVEC(atpic_intr5_pti),
|
||||
IDTVEC(atpic_intr6_pti), IDTVEC(atpic_intr7_pti),
|
||||
IDTVEC(atpic_intr8_pti), IDTVEC(atpic_intr9_pti),
|
||||
IDTVEC(atpic_intr10_pti), IDTVEC(atpic_intr11_pti),
|
||||
IDTVEC(atpic_intr12_pti), IDTVEC(atpic_intr13_pti),
|
||||
IDTVEC(atpic_intr14_pti), IDTVEC(atpic_intr15_pti);
|
||||
|
||||
#define IRQ(ap, ai) ((ap)->at_irqbase + (ai)->at_irq)
|
||||
|
||||
@ -92,7 +102,7 @@ inthand_t
|
||||
|
||||
#define INTSRC(irq) \
|
||||
{ { &atpics[(irq) / 8].at_pic }, IDTVEC(atpic_intr ## irq ), \
|
||||
(irq) % 8 }
|
||||
IDTVEC(atpic_intr ## irq ## _pti), (irq) % 8 }
|
||||
|
||||
struct atpic {
|
||||
struct pic at_pic;
|
||||
@ -104,7 +114,7 @@ struct atpic {
|
||||
|
||||
struct atpic_intsrc {
|
||||
struct intsrc at_intsrc;
|
||||
inthand_t *at_intr;
|
||||
inthand_t *at_intr, *at_intr_pti;
|
||||
int at_irq; /* Relative to PIC base. */
|
||||
enum intr_trigger at_trigger;
|
||||
u_long at_count;
|
||||
@ -408,7 +418,8 @@ atpic_startup(void)
|
||||
ai->at_intsrc.is_count = &ai->at_count;
|
||||
ai->at_intsrc.is_straycount = &ai->at_straycount;
|
||||
setidt(((struct atpic *)ai->at_intsrc.is_pic)->at_intbase +
|
||||
ai->at_irq, ai->at_intr, SDT_ATPIC, SEL_KPL, GSEL_ATPIC);
|
||||
ai->at_irq, pti ? ai->at_intr_pti : ai->at_intr, SDT_ATPIC,
|
||||
SEL_KPL, GSEL_ATPIC);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -171,13 +171,23 @@ static inthand_t *ioint_handlers[] = {
|
||||
IDTVEC(apic_isr7), /* 224 - 255 */
|
||||
};
|
||||
|
||||
static inthand_t *ioint_pti_handlers[] = {
|
||||
NULL, /* 0 - 31 */
|
||||
IDTVEC(apic_isr1_pti), /* 32 - 63 */
|
||||
IDTVEC(apic_isr2_pti), /* 64 - 95 */
|
||||
IDTVEC(apic_isr3_pti), /* 96 - 127 */
|
||||
IDTVEC(apic_isr4_pti), /* 128 - 159 */
|
||||
IDTVEC(apic_isr5_pti), /* 160 - 191 */
|
||||
IDTVEC(apic_isr6_pti), /* 192 - 223 */
|
||||
IDTVEC(apic_isr7_pti), /* 224 - 255 */
|
||||
};
|
||||
|
||||
static u_int32_t lapic_timer_divisors[] = {
|
||||
APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
|
||||
APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
|
||||
};
|
||||
|
||||
extern inthand_t IDTVEC(rsvd);
|
||||
extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd);
|
||||
|
||||
volatile char *lapic_map;
|
||||
vm_paddr_t lapic_paddr;
|
||||
@ -496,15 +506,18 @@ native_lapic_init(vm_paddr_t addr)
|
||||
PCPU_SET(apic_id, lapic_id());
|
||||
|
||||
/* Local APIC timer interrupt. */
|
||||
setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC);
|
||||
setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint),
|
||||
SDT_APIC, SEL_KPL, GSEL_APIC);
|
||||
|
||||
/* Local APIC error interrupt. */
|
||||
setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC);
|
||||
setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint),
|
||||
SDT_APIC, SEL_KPL, GSEL_APIC);
|
||||
|
||||
/* XXX: Thermal interrupt */
|
||||
|
||||
/* Local APIC CMCI. */
|
||||
setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC);
|
||||
setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint),
|
||||
SDT_APICT, SEL_KPL, GSEL_APIC);
|
||||
|
||||
if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
|
||||
arat = 0;
|
||||
@ -1569,8 +1582,8 @@ native_apic_enable_vector(u_int apic_id, u_int vector)
|
||||
KASSERT(vector != IDT_DTRACE_RET,
|
||||
("Attempt to overwrite DTrace entry"));
|
||||
#endif
|
||||
setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL,
|
||||
GSEL_APIC);
|
||||
setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32],
|
||||
SDT_APIC, SEL_KPL, GSEL_APIC);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1589,7 +1602,8 @@ native_apic_disable_vector(u_int apic_id, u_int vector)
|
||||
* We can not currently clear the idt entry because other cpus
|
||||
* may have a valid vector at this offset.
|
||||
*/
|
||||
setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC);
|
||||
setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT,
|
||||
SEL_KPL, GSEL_APIC);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2095,7 +2109,8 @@ native_lapic_ipi_alloc(inthand_t *ipifunc)
|
||||
long func;
|
||||
int idx, vector;
|
||||
|
||||
KASSERT(ipifunc != &IDTVEC(rsvd), ("invalid ipifunc %p", ipifunc));
|
||||
KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti),
|
||||
("invalid ipifunc %p", ipifunc));
|
||||
|
||||
vector = -1;
|
||||
mtx_lock_spin(&icu_lock);
|
||||
@ -2124,8 +2139,10 @@ native_lapic_ipi_free(int vector)
|
||||
mtx_lock_spin(&icu_lock);
|
||||
ip = &idt[vector];
|
||||
func = (ip->gd_hioffset << 16) | ip->gd_looffset;
|
||||
KASSERT(func != (uintptr_t)&IDTVEC(rsvd),
|
||||
KASSERT(func != (uintptr_t)&IDTVEC(rsvd) &&
|
||||
func != (uintptr_t)&IDTVEC(rsvd_pti),
|
||||
("invalid idtfunc %#lx", func));
|
||||
setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC);
|
||||
setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT,
|
||||
SEL_KPL, GSEL_APIC);
|
||||
mtx_unlock_spin(&icu_lock);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user