PTI: Trap if we returned to userspace with kernel (full) page table

still active.

Map userspace portion of VA in the PTI kernel-mode page table as
non-executable. This way, if we ever miss reloading ucr3 into %cr3 on
the return to usermode, the process traps instead of executing in
potentially vulnerable setup.  Catch the condition of such trap and
verify user-mode %cr3, which is saved by page fault handler.

I peek this trick in some article about Linux implementation.

Reviewed by:	alc, markj (previous version)
Sponsored by:	The FreeBSD Foundation
MFC after:	12 days
DIfferential revision:	https://reviews.freebsd.org/D13956
This commit is contained in:
Konstantin Belousov 2018-01-19 22:10:29 +00:00
parent 1c8edc1ed8
commit b4dfc9d7ad
7 changed files with 56 additions and 11 deletions

View File

@ -287,24 +287,42 @@ IDTVEC(dblfault)
3: hlt
jmp 3b
PTI_ENTRY page, Xpage, has_err=1
ALIGN_TEXT
IDTVEC(page_pti)
testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp)
jz Xpage
swapgs
pushq %rax
pushq %rdx
movq %cr3,%rax
movq %rax,PCPU(SAVED_UCR3)
PTI_UUENTRY has_err=1
subq $TF_ERR,%rsp
movq %rdi,TF_RDI(%rsp)
movq %rax,TF_RAX(%rsp)
movq %rdx,TF_RDX(%rsp)
jmp page_u
IDTVEC(page)
subq $TF_ERR,%rsp
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
movq %rdi,TF_RDI(%rsp) /* free up GP registers */
movq %rax,TF_RAX(%rsp)
movq %rdx,TF_RDX(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz page_cr2 /* already running with kernel GS.base */
swapgs
movq PCPU(CURPCB),%rdi
page_u: movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
movq PCPU(SAVED_UCR3),%rax
movq %rax,PCB_SAVED_UCR3(%rdi)
page_cr2:
movq %cr2,%rdi /* preserve %cr2 before .. */
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
SAVE_SEGS
movl $T_PAGEFLT,TF_TRAPNO(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs_no_rdi
jz alltraps_pushregs_no_rax
sti
jmp alltraps_pushregs_no_rdi
jmp alltraps_pushregs_no_rax
/*
* We have to special-case this one. If we get a trap in doreti() at

View File

@ -141,6 +141,7 @@ ASSYM(PCB_LDT, offsetof(struct pcb, pcb_ldt));
ASSYM(PCB_TR, offsetof(struct pcb, pcb_tr));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
ASSYM(PCB_SAVED_UCR3, offsetof(struct pcb, pcb_saved_ucr3));
ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
ASSYM(PCB_EFER, offsetof(struct pcb, pcb_efer));
@ -224,6 +225,7 @@ ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
ASSYM(PC_KCR3, offsetof(struct pcpu, pc_kcr3));
ASSYM(PC_UCR3, offsetof(struct pcpu, pc_ucr3));
ASSYM(PC_SAVED_UCR3, offsetof(struct pcpu, pc_saved_ucr3));
ASSYM(PC_PTI_STACK, offsetof(struct pcpu, pc_pti_stack));
ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ);

View File

@ -2575,6 +2575,15 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
pml4 = &pmap->pm_pml4[pml4index];
*pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
if (pmap->pm_pml4u != NULL && pml4index < NUPML4E) {
/*
* PTI: Make all user-space mappings in the
* kernel-mode page table no-execute so that
* we detect any programming errors that leave
* the kernel-mode page table active on return
* to user space.
*/
*pml4 |= pg_nx;
pml4u = &pmap->pm_pml4u[pml4index];
*pml4u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V |
PG_A | PG_M;

View File

@ -701,6 +701,17 @@ trap_pfault(struct trapframe *frame, int usermode)
return (-1);
}
/*
* If nx protection of the usermode portion of kernel page
* tables caused trap, panic.
*/
if (pti && usermode && pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W |
PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) &&
(curpcb->pcb_saved_ucr3 & ~(PMAP_PCID_OVERMAX - 1))==
(PCPU_GET(curpmap)->pm_cr3 & ~(PMAP_PCID_OVERMAX - 1)))
panic("PTI: pid %d comm %s tf_err %#lx\n", p->p_pid,
p->p_comm, frame->tf_err);
/*
* PGEX_I is defined only if the execute disable bit capability is
* supported and enabled.

View File

@ -183,10 +183,7 @@
.endr
.endm
.macro PTI_UENTRY has_err
swapgs
pushq %rax
pushq %rdx
.macro PTI_UUENTRY has_err
movq PCPU(KCR3),%rax
movq %rax,%cr3
movq PCPU(RSP0),%rax
@ -197,6 +194,13 @@
popq %rax
.endm
.macro PTI_UENTRY has_err
swapgs
pushq %rax
pushq %rdx
PTI_UUENTRY \has_err
.endm
.macro PTI_ENTRY name, cont, has_err=0
ALIGN_TEXT
.globl X\name\()_pti

View File

@ -92,7 +92,7 @@ struct pcb {
/* copyin/out fault recovery */
caddr_t pcb_onfault;
uint64_t pcb_pad0;
uint64_t pcb_saved_ucr3;
/* local tss, with i/o bitmap; NULL for common */
struct amd64tss *pcb_tssp;

View File

@ -51,6 +51,7 @@
struct amd64tss *pc_commontssp;/* Common TSS for the CPU */ \
uint64_t pc_kcr3; \
uint64_t pc_ucr3; \
uint64_t pc_saved_ucr3; \
register_t pc_rsp0; \
register_t pc_scratch_rsp; /* User %rsp in syscall */ \
register_t pc_scratch_rax; \
@ -73,7 +74,7 @@
uint32_t pc_pcid_next; \
uint32_t pc_pcid_gen; \
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
char __pad[232] /* be divisor of PAGE_SIZE \
char __pad[224] /* be divisor of PAGE_SIZE \
after cache alignment */
#define PC_DBREG_CMD_NONE 0