PTI: Trap if we returned to userspace with kernel (full) page table
still active. Map userspace portion of VA in the PTI kernel-mode page table as non-executable. This way, if we ever miss reloading ucr3 into %cr3 on the return to usermode, the process traps instead of executing in potentially vulnerable setup. Catch the condition of such trap and verify user-mode %cr3, which is saved by page fault handler. I peek this trick in some article about Linux implementation. Reviewed by: alc, markj (previous version) Sponsored by: The FreeBSD Foundation MFC after: 12 days DIfferential revision: https://reviews.freebsd.org/D13956
This commit is contained in:
parent
1c8edc1ed8
commit
b4dfc9d7ad
@ -287,24 +287,42 @@ IDTVEC(dblfault)
|
||||
3: hlt
|
||||
jmp 3b
|
||||
|
||||
PTI_ENTRY page, Xpage, has_err=1
|
||||
ALIGN_TEXT
|
||||
IDTVEC(page_pti)
|
||||
testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp)
|
||||
jz Xpage
|
||||
swapgs
|
||||
pushq %rax
|
||||
pushq %rdx
|
||||
movq %cr3,%rax
|
||||
movq %rax,PCPU(SAVED_UCR3)
|
||||
PTI_UUENTRY has_err=1
|
||||
subq $TF_ERR,%rsp
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
movq %rax,TF_RAX(%rsp)
|
||||
movq %rdx,TF_RDX(%rsp)
|
||||
jmp page_u
|
||||
IDTVEC(page)
|
||||
subq $TF_ERR,%rsp
|
||||
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
|
||||
movq %rdi,TF_RDI(%rsp) /* free up GP registers */
|
||||
movq %rax,TF_RAX(%rsp)
|
||||
movq %rdx,TF_RDX(%rsp)
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz page_cr2 /* already running with kernel GS.base */
|
||||
swapgs
|
||||
movq PCPU(CURPCB),%rdi
|
||||
page_u: movq PCPU(CURPCB),%rdi
|
||||
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
|
||||
movq PCPU(SAVED_UCR3),%rax
|
||||
movq %rax,PCB_SAVED_UCR3(%rdi)
|
||||
page_cr2:
|
||||
movq %cr2,%rdi /* preserve %cr2 before .. */
|
||||
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
|
||||
SAVE_SEGS
|
||||
movl $T_PAGEFLT,TF_TRAPNO(%rsp)
|
||||
testl $PSL_I,TF_RFLAGS(%rsp)
|
||||
jz alltraps_pushregs_no_rdi
|
||||
jz alltraps_pushregs_no_rax
|
||||
sti
|
||||
jmp alltraps_pushregs_no_rdi
|
||||
jmp alltraps_pushregs_no_rax
|
||||
|
||||
/*
|
||||
* We have to special-case this one. If we get a trap in doreti() at
|
||||
|
@ -141,6 +141,7 @@ ASSYM(PCB_LDT, offsetof(struct pcb, pcb_ldt));
|
||||
ASSYM(PCB_TR, offsetof(struct pcb, pcb_tr));
|
||||
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
|
||||
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
|
||||
ASSYM(PCB_SAVED_UCR3, offsetof(struct pcb, pcb_saved_ucr3));
|
||||
ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
|
||||
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
|
||||
ASSYM(PCB_EFER, offsetof(struct pcb, pcb_efer));
|
||||
@ -224,6 +225,7 @@ ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
|
||||
ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
|
||||
ASSYM(PC_KCR3, offsetof(struct pcpu, pc_kcr3));
|
||||
ASSYM(PC_UCR3, offsetof(struct pcpu, pc_ucr3));
|
||||
ASSYM(PC_SAVED_UCR3, offsetof(struct pcpu, pc_saved_ucr3));
|
||||
ASSYM(PC_PTI_STACK, offsetof(struct pcpu, pc_pti_stack));
|
||||
ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ);
|
||||
|
||||
|
@ -2575,6 +2575,15 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
|
||||
pml4 = &pmap->pm_pml4[pml4index];
|
||||
*pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
|
||||
if (pmap->pm_pml4u != NULL && pml4index < NUPML4E) {
|
||||
/*
|
||||
* PTI: Make all user-space mappings in the
|
||||
* kernel-mode page table no-execute so that
|
||||
* we detect any programming errors that leave
|
||||
* the kernel-mode page table active on return
|
||||
* to user space.
|
||||
*/
|
||||
*pml4 |= pg_nx;
|
||||
|
||||
pml4u = &pmap->pm_pml4u[pml4index];
|
||||
*pml4u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V |
|
||||
PG_A | PG_M;
|
||||
|
@ -701,6 +701,17 @@ trap_pfault(struct trapframe *frame, int usermode)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* If nx protection of the usermode portion of kernel page
|
||||
* tables caused trap, panic.
|
||||
*/
|
||||
if (pti && usermode && pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W |
|
||||
PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) &&
|
||||
(curpcb->pcb_saved_ucr3 & ~(PMAP_PCID_OVERMAX - 1))==
|
||||
(PCPU_GET(curpmap)->pm_cr3 & ~(PMAP_PCID_OVERMAX - 1)))
|
||||
panic("PTI: pid %d comm %s tf_err %#lx\n", p->p_pid,
|
||||
p->p_comm, frame->tf_err);
|
||||
|
||||
/*
|
||||
* PGEX_I is defined only if the execute disable bit capability is
|
||||
* supported and enabled.
|
||||
|
@ -183,10 +183,7 @@
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.macro PTI_UENTRY has_err
|
||||
swapgs
|
||||
pushq %rax
|
||||
pushq %rdx
|
||||
.macro PTI_UUENTRY has_err
|
||||
movq PCPU(KCR3),%rax
|
||||
movq %rax,%cr3
|
||||
movq PCPU(RSP0),%rax
|
||||
@ -197,6 +194,13 @@
|
||||
popq %rax
|
||||
.endm
|
||||
|
||||
.macro PTI_UENTRY has_err
|
||||
swapgs
|
||||
pushq %rax
|
||||
pushq %rdx
|
||||
PTI_UUENTRY \has_err
|
||||
.endm
|
||||
|
||||
.macro PTI_ENTRY name, cont, has_err=0
|
||||
ALIGN_TEXT
|
||||
.globl X\name\()_pti
|
||||
|
@ -92,7 +92,7 @@ struct pcb {
|
||||
/* copyin/out fault recovery */
|
||||
caddr_t pcb_onfault;
|
||||
|
||||
uint64_t pcb_pad0;
|
||||
uint64_t pcb_saved_ucr3;
|
||||
|
||||
/* local tss, with i/o bitmap; NULL for common */
|
||||
struct amd64tss *pcb_tssp;
|
||||
|
@ -51,6 +51,7 @@
|
||||
struct amd64tss *pc_commontssp;/* Common TSS for the CPU */ \
|
||||
uint64_t pc_kcr3; \
|
||||
uint64_t pc_ucr3; \
|
||||
uint64_t pc_saved_ucr3; \
|
||||
register_t pc_rsp0; \
|
||||
register_t pc_scratch_rsp; /* User %rsp in syscall */ \
|
||||
register_t pc_scratch_rax; \
|
||||
@ -73,7 +74,7 @@
|
||||
uint32_t pc_pcid_next; \
|
||||
uint32_t pc_pcid_gen; \
|
||||
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
|
||||
char __pad[232] /* be divisor of PAGE_SIZE \
|
||||
char __pad[224] /* be divisor of PAGE_SIZE \
|
||||
after cache alignment */
|
||||
|
||||
#define PC_DBREG_CMD_NONE 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user