amd64: Add a knob to flush RSB on context switches if machine has SMEP.

The flush is needed to prevent cross-process ret2spec, which is not handled
on kernel entry if IBPB is enabled but SMEP is present.
While there, add i386 RSB flush.

Reported by:	Anthony Steinhauser <asteinhauser@google.com>
Reviewed by:	markj, Anthony Steinhauser
Discussed with:	philip
admbugs:	961
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
This commit is contained in:
Konstantin Belousov 2020-05-20 22:00:31 +00:00
parent 63939df8bc
commit ea6020830c
7 changed files with 65 additions and 14 deletions

View File

@ -28,7 +28,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd February 4, 2020
.Dd May 16, 2020
.Dt SECURITY 7
.Os
.Sh NAME
@ -992,6 +992,13 @@ See also
.Xr proccontrol 1
mode
.Dv kpti .
.It Dv machdep.mitigations.flush_rsb_ctxsw
amd64.
Controls Return Stack Buffer flush on context switch, to prevent
cross-process ret2spec attacks.
Only needed, and only enabled by default, if the machine
supports SMEP, otherwise IBRS would do necessary flushing on kernel
entry anyway.
.It Dv hw.mds_disable
amd64 and i386.
Controls Microarchitectural Data Sampling hardware information leak

View File

@ -221,6 +221,8 @@ done_load_dr:
movq %rax,(%rsp)
movq PCPU(CURTHREAD),%rdi
call fpu_activate_sw
cmpb $0,cpu_flush_rsb_ctxsw(%rip)
jne rsb_flush
ret
/*

View File

@ -238,12 +238,24 @@ initializecpu(void)
cr4 |= CR4_PKE;
/*
* If SMEP is present, we only need to flush RSB (by default)
* on context switches, to prevent cross-process ret2spec
* attacks. Do it automatically if ibrs_disable is set, to
* complete the mitigation.
*
* Postpone enabling the SMEP on the boot CPU until the page
* tables are switched from the boot loader identity mapping
* to the kernel tables. The boot loader enables the U bit in
* its tables.
*/
if (!IS_BSP()) {
if (IS_BSP()) {
if (cpu_stdext_feature & CPUID_STDEXT_SMEP &&
!TUNABLE_INT_FETCH(
"machdep.mitigations.cpu_flush_rsb_ctxsw",
&cpu_flush_rsb_ctxsw) &&
hw_ibrs_disable)
cpu_flush_rsb_ctxsw = 1;
} else {
if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
cr4 |= CR4_SMEP;
if (cpu_stdext_feature & CPUID_STDEXT_SMAP)

View File

@ -1613,23 +1613,27 @@ ENTRY(pmap_pti_pcid_invlrng)
retq
.altmacro
.macro ibrs_seq_label l
handle_ibrs_\l:
.macro rsb_seq_label l
rsb_seq_\l:
.endm
.macro ibrs_call_label l
call handle_ibrs_\l
.macro rsb_call_label l
call rsb_seq_\l
.endm
.macro ibrs_seq count
.macro rsb_seq count
ll=1
.rept \count
ibrs_call_label %(ll)
rsb_call_label %(ll)
nop
ibrs_seq_label %(ll)
rsb_seq_label %(ll)
addq $8,%rsp
ll=ll+1
.endr
.endm
ENTRY(rsb_flush)
rsb_seq 32
ret
/* all callers already saved %rax, %rdx, and %rcx */
ENTRY(handle_ibrs_entry)
cmpb $0,hw_ibrs_ibpb_active(%rip)
@ -1641,8 +1645,7 @@ ENTRY(handle_ibrs_entry)
wrmsr
movb $1,PCPU(IBPB_SET)
testl $CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
jne 1f
ibrs_seq 32
je rsb_flush
1: ret
END(handle_ibrs_entry)

View File

@ -445,6 +445,28 @@ msr_onfault:
movl $EFAULT,%eax
ret
.altmacro
.macro rsb_seq_label l
rsb_seq_\l:
.endm
.macro rsb_call_label l
call rsb_seq_\l
.endm
.macro rsb_seq count
ll=1
.rept \count
rsb_call_label %(ll)
nop
rsb_seq_label %(ll)
addl $4,%esp
ll=ll+1
.endr
.endm
ENTRY(rsb_flush)
rsb_seq 32
ret
ENTRY(handle_ibrs_entry)
cmpb $0,hw_ibrs_ibpb_active
je 1f
@ -455,10 +477,9 @@ ENTRY(handle_ibrs_entry)
wrmsr
movb $1,PCPU(IBPB_SET)
/*
* i386 does not implement SMEP, but the 4/4 split makes this not
* that important.
* i386 does not implement SMEP.
*/
1: ret
1: jmp rsb_flush
END(handle_ibrs_entry)
ENTRY(handle_ibrs_exit)

View File

@ -94,6 +94,7 @@ extern int hw_ibrs_ibpb_active;
extern int hw_mds_disable;
extern int hw_ssb_active;
extern int x86_taa_enable;
extern int cpu_flush_rsb_ctxsw;
struct pcb;
struct thread;

View File

@ -1397,6 +1397,11 @@ SYSCTL_PROC(_machdep_mitigations_taa, OID_AUTO, state,
sysctl_taa_state_handler, "A",
"TAA Mitigation state");
int __read_frequently cpu_flush_rsb_ctxsw;
SYSCTL_INT(_machdep_mitigations, OID_AUTO, flush_rsb_ctxsw,
CTLFLAG_RW | CTLFLAG_NOFETCH, &cpu_flush_rsb_ctxsw, 0,
"Flush Return Stack Buffer on context switch");
/*
* Enable and restore kernel text write permissions.
* Callers must ensure that disable_wp()/restore_wp() are executed