IBRS support, AKA Spectre hardware mitigation.

It is coded according to the Intel document 336996-001, reading of the
patches posted on lkml, and some additional consultations with Intel.

For existing processors, you need a microcode update which adds IBRS
CPU features, and to manually enable it by setting the tunable/sysctl
hw.ibrs_disable to 0.  Current status can be checked in sysctl
hw.ibrs_active.  The mitigation might be inactive if the CPU feature
is not patched in, or if CPU reports that IBRS use is not required, by
IA32_ARCH_CAP_IBRS_ALL bit.

Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D14029
This commit is contained in:
Konstantin Belousov 2018-01-31 14:36:27 +00:00
parent 3b5319325e
commit 319117fd57
13 changed files with 217 additions and 31 deletions

View File

@ -171,21 +171,22 @@ X\l:
alltraps:
movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz alltraps_segs /* already running with kernel GS.base */
jz 1f /* already running with kernel GS.base */
swapgs
movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
alltraps_segs:
SAVE_SEGS
testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs_no_rdi
sti
alltraps_pushregs_no_rdi:
1: SAVE_SEGS
movq %rdx,TF_RDX(%rsp)
movq %rax,TF_RAX(%rsp)
movq %rcx,TF_RCX(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp)
jz 2f
call handle_ibrs_entry
2: testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs_no_rax
sti
alltraps_pushregs_no_rax:
movq %rsi,TF_RSI(%rsp)
movq %rcx,TF_RCX(%rsp)
movq %r8,TF_R8(%rsp)
movq %r9,TF_R9(%rsp)
movq %rbx,TF_RBX(%rsp)
@ -243,13 +244,18 @@ calltrap:
alltraps_noen:
movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz alltraps_noen_segs /* already running with kernel GS.base */
jz 1f /* already running with kernel GS.base */
swapgs
movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
alltraps_noen_segs:
SAVE_SEGS
jmp alltraps_pushregs_no_rdi
1: SAVE_SEGS
movq %rdx,TF_RDX(%rsp)
movq %rax,TF_RAX(%rsp)
movq %rcx,TF_RCX(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp)
jz alltraps_pushregs_no_rax
call handle_ibrs_entry
jmp alltraps_pushregs_no_rax
IDTVEC(dblfault)
subq $TF_ERR,%rsp
@ -301,12 +307,14 @@ IDTVEC(page_pti)
movq %rdi,TF_RDI(%rsp)
movq %rax,TF_RAX(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
jmp page_u
IDTVEC(page)
subq $TF_ERR,%rsp
movq %rdi,TF_RDI(%rsp) /* free up GP registers */
movq %rax,TF_RAX(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz page_cr2 /* already running with kernel GS.base */
swapgs
@ -314,6 +322,7 @@ page_u: movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
movq PCPU(SAVED_UCR3),%rax
movq %rax,PCB_SAVED_UCR3(%rdi)
call handle_ibrs_entry
page_cr2:
movq %cr2,%rdi /* preserve %cr2 before .. */
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
@ -371,6 +380,7 @@ prot_addrf:
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
movq %rax,TF_RAX(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
leaq doreti_iret(%rip),%rdi
@ -396,7 +406,8 @@ prot_addrf:
3: cmpw $KUG32SEL,TF_GS(%rsp)
jne 4f
movq %rdx,PCB_GSBASE(%rdi)
4: orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */
4: call handle_ibrs_entry
orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
@ -440,7 +451,9 @@ fast_syscall_common:
movq %r11,TF_RSP(%rsp) /* user stack pointer */
movq PCPU(SCRATCH_RAX),%rax
movq %rax,TF_RAX(%rsp) /* syscall number */
movq %rdx,TF_RDX(%rsp) /* arg 3 */
SAVE_SEGS
call handle_ibrs_entry
movq PCPU(CURPCB),%r11
andl $~PCB_FULL_IRET,PCB_FLAGS(%r11)
sti
@ -449,7 +462,6 @@ fast_syscall_common:
movq $2,TF_ERR(%rsp)
movq %rdi,TF_RDI(%rsp) /* arg 1 */
movq %rsi,TF_RSI(%rsp) /* arg 2 */
movq %rdx,TF_RDX(%rsp) /* arg 3 */
movq %r10,TF_RCX(%rsp) /* arg 4 */
movq %r8,TF_R8(%rsp) /* arg 5 */
movq %r9,TF_R9(%rsp) /* arg 6 */
@ -475,6 +487,7 @@ fast_syscall_common:
movq PCPU(CURTHREAD),%rax
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
jne 3f
call handle_ibrs_exit
/* Restore preserved registers. */
MEXITCOUNT
movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */
@ -561,8 +574,8 @@ IDTVEC(nmi)
testb $SEL_RPL_MASK,TF_CS(%rsp)
jnz nmi_fromuserspace
/*
* We've interrupted the kernel. Preserve GS.base in %r12
* and %cr3 in %r13.
* We've interrupted the kernel. Preserve GS.base in %r12,
* %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
*/
movl $MSR_GSBASE,%ecx
rdmsr
@ -577,8 +590,14 @@ IDTVEC(nmi)
movq %cr3,%r13
movq PCPU(KCR3),%rax
cmpq $~0,%rax
je nmi_calltrap
je 1f
movq %rax,%cr3
1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
je nmi_calltrap
movl $MSR_IA32_SPEC_CTRL,%ecx
rdmsr
movl %eax,%r14d
call handle_ibrs_entry
jmp nmi_calltrap
nmi_fromuserspace:
incl %ebx
@ -588,7 +607,8 @@ nmi_fromuserspace:
cmpq $~0,%rax
je 1f
movq %rax,%cr3
1: movq PCPU(CURPCB),%rdi
1: call handle_ibrs_entry
movq PCPU(CURPCB),%rdi
testq %rdi,%rdi
jz 3f
orl $PCB_FULL_IRET,PCB_FLAGS(%rdi)
@ -682,10 +702,19 @@ nocallchain:
#endif
testl %ebx,%ebx /* %ebx == 0 => return to userland */
jnz doreti_exit
/*
* Restore speculation control MSR, if preserved.
*/
testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
je 1f
movl %r14d,%eax
xorl %edx,%edx
movl $MSR_IA32_SPEC_CTRL,%ecx
wrmsr
/*
* Put back the preserved MSR_GSBASE value.
*/
movl $MSR_GSBASE,%ecx
1: movl $MSR_GSBASE,%ecx
movq %r12,%rdx
movl %edx,%eax
shrq $32,%rdx
@ -743,8 +772,8 @@ IDTVEC(mchk)
testb $SEL_RPL_MASK,TF_CS(%rsp)
jnz mchk_fromuserspace
/*
* We've interrupted the kernel. Preserve GS.base in %r12
* and %cr3 in %r13.
* We've interrupted the kernel. Preserve GS.base in %r12,
* %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
*/
movl $MSR_GSBASE,%ecx
rdmsr
@ -759,8 +788,14 @@ IDTVEC(mchk)
movq %cr3,%r13
movq PCPU(KCR3),%rax
cmpq $~0,%rax
je mchk_calltrap
je 1f
movq %rax,%cr3
1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
je mchk_calltrap
movl $MSR_IA32_SPEC_CTRL,%ecx
rdmsr
movl %eax,%r14d
call handle_ibrs_entry
jmp mchk_calltrap
mchk_fromuserspace:
incl %ebx
@ -770,7 +805,7 @@ mchk_fromuserspace:
cmpq $~0,%rax
je 1f
movq %rax,%cr3
1:
1: call handle_ibrs_entry
/* Note: this label is also used by ddb and gdb: */
mchk_calltrap:
FAKE_MCOUNT(TF_RIP(%rsp))
@ -779,10 +814,19 @@ mchk_calltrap:
MEXITCOUNT
testl %ebx,%ebx /* %ebx == 0 => return to userland */
jnz doreti_exit
/*
* Restore speculation control MSR, if preserved.
*/
testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
je 1f
movl %r14d,%eax
xorl %edx,%edx
movl $MSR_IA32_SPEC_CTRL,%ecx
wrmsr
/*
* Put back the preserved MSR_GSBASE value.
*/
movl $MSR_GSBASE,%ecx
1: movl $MSR_GSBASE,%ecx
movq %r12,%rdx
movl %edx,%eax
shrq $32,%rdx
@ -960,6 +1004,7 @@ ld_regs:
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 2f /* keep running with kernel GS.base */
cli
call handle_ibrs_exit_rs
cmpb $0,pti
je 1f
pushq %rdx
@ -1011,6 +1056,10 @@ set_segs:
.globl doreti_iret_fault
doreti_iret_fault:
subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */
movq %rax,TF_RAX(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
call handle_ibrs_entry
testb $SEL_RPL_MASK,TF_CS(%rsp)
jz 1f
sti
@ -1019,11 +1068,8 @@ doreti_iret_fault:
movl $TF_HASSEGS,TF_FLAGS(%rsp)
movq %rdi,TF_RDI(%rsp)
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
movq %r8,TF_R8(%rsp)
movq %r9,TF_R9(%rsp)
movq %rax,TF_RAX(%rsp)
movq %rbx,TF_RBX(%rsp)
movq %rbp,TF_RBP(%rsp)
movq %r10,TF_R10(%rsp)

View File

@ -228,6 +228,7 @@ ASSYM(PC_UCR3, offsetof(struct pcpu, pc_ucr3));
ASSYM(PC_SAVED_UCR3, offsetof(struct pcpu, pc_saved_ucr3));
ASSYM(PC_PTI_STACK, offsetof(struct pcpu, pc_pti_stack));
ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ);
ASSYM(PC_IBPB_SET, offsetof(struct pcpu, pc_ibpb_set));
ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL);

View File

@ -223,6 +223,7 @@ initializecpu(void)
wrmsr(MSR_EFER, msr);
pg_nx = PG_NX;
}
hw_ibrs_recalculate();
switch (cpu_vendor_id) {
case CPU_VENDOR_AMD:
init_amd();

View File

@ -1826,6 +1826,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
#endif
thread0.td_critnest = 0;
TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable);
TSEXIT();
/* Location of kernel stack for locore */

View File

@ -33,6 +33,7 @@
#include "opt_ddb.h"
#include <machine/asmacros.h>
#include <machine/specialreg.h>
#include <machine/pmap.h>
#include "assym.s"
@ -850,3 +851,67 @@ ENTRY(pmap_pti_pcid_invlrng)
movq %rsi,%cr3 /* back to kernel */
popfq
retq
.altmacro
.macro ibrs_seq_label l
handle_ibrs_\l:
.endm
.macro ibrs_call_label l
call handle_ibrs_\l
.endm
.macro ibrs_seq count
ll=1
.rept \count
ibrs_call_label %(ll)
nop
ibrs_seq_label %(ll)
addq $8,%rsp
ll=ll+1
.endr
.endm
/* all callers already saved %rax, %rdx, and %rcx */
ENTRY(handle_ibrs_entry)
cmpb $0,hw_ibrs_active(%rip)
je 1f
movl $MSR_IA32_SPEC_CTRL,%ecx
movl $IA32_SPEC_CTRL_IBRS,%eax
movl $IA32_SPEC_CTRL_IBRS>>32,%edx
wrmsr
movb $1,PCPU(IBPB_SET)
testl $CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
jne 1f
ibrs_seq 32
1: ret
END(handle_ibrs_entry)
ENTRY(handle_ibrs_exit)
cmpb $0,PCPU(IBPB_SET)
je 1f
movl $MSR_IA32_SPEC_CTRL,%ecx
xorl %eax,%eax
xorl %edx,%edx
wrmsr
movb $0,PCPU(IBPB_SET)
1: ret
END(handle_ibrs_exit)
/* registers-neutral version, but needs stack */
ENTRY(handle_ibrs_exit_rs)
cmpb $0,PCPU(IBPB_SET)
je 1f
pushq %rax
pushq %rdx
pushq %rcx
movl $MSR_IA32_SPEC_CTRL,%ecx
xorl %eax,%eax
xorl %edx,%edx
wrmsr
popq %rcx
popq %rdx
popq %rax
movb $0,PCPU(IBPB_SET)
1: ret
END(handle_ibrs_exit_rs)
.noaltmacro

View File

@ -53,13 +53,14 @@ int0x80_syscall_common:
movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
SAVE_SEGS
sti
movq %rsi,TF_RSI(%rsp)
movq %rax,TF_RAX(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
call handle_ibrs_entry
sti
movq %rsi,TF_RSI(%rsp)
movq %r8,TF_R8(%rsp)
movq %r9,TF_R9(%rsp)
movq %rax,TF_RAX(%rsp)
movq %rbx,TF_RBX(%rsp)
movq %rbp,TF_RBP(%rsp)
movq %r10,TF_R10(%rsp)

View File

@ -38,6 +38,7 @@
extern uint64_t *vm_page_dump;
extern int hw_lower_amd64_sharedpage;
extern int hw_ibrs_disable;
/*
* The file "conf/ldscript.amd64" defines the symbol "kernphys". Its

View File

@ -74,7 +74,8 @@
uint32_t pc_pcid_next; \
uint32_t pc_pcid_gen; \
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
char __pad[224] /* be divisor of PAGE_SIZE \
uint32_t pc_ibpb_set; \
char __pad[216] /* be divisor of PAGE_SIZE \
after cache alignment */
#define PC_DBREG_CMD_NONE 0

View File

@ -527,6 +527,7 @@ cpuctl_do_eval_cpu_features(int cpu, struct thread *td)
set_cpu(cpu, td);
identify_cpu1();
identify_cpu2();
hw_ibrs_recalculate();
restore_cpu(oldcpu, is_bound, td);
printcpuinfo();
return (0);

View File

@ -827,3 +827,11 @@ msr_onfault:
movl $0,PCB_ONFAULT(%ecx)
movl $EFAULT,%eax
ret
ENTRY(handle_ibrs_entry)
ret
END(handle_ibrs_entry)
ENTRY(handle_ibrs_exit)
ret
END(handle_ibrs_exit)

View File

@ -697,6 +697,10 @@
#define IA32_MISC_EN_xTPRD 0x0000000000800000ULL
#define IA32_MISC_EN_XDD 0x0000000400000000ULL
/*
* IA32_SPEC_CTRL and IA32_PRED_CMD MSRs are described in the Intel'
* document 336996-001 Speculative Execution Side Channel Mitigations.
*/
/* MSR IA32_SPEC_CTRL */
#define IA32_SPEC_CTRL_IBRS 0x0000000000000001ULL
#define IA32_SPEC_CTRL_STIBP 0x0000000000000002ULL

View File

@ -131,6 +131,9 @@ bool fix_cpuid(void);
void fillw(int /*u_short*/ pat, void *base, size_t cnt);
int is_physical_memory(vm_paddr_t addr);
int isa_nmi(int cd);
void handle_ibrs_entry(void);
void handle_ibrs_exit(void);
void hw_ibrs_recalculate(void);
void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame);
void nmi_call_kdb_smp(u_int type, struct trapframe *frame);
void nmi_handle_intr(u_int type, struct trapframe *frame);

View File

@ -142,6 +142,12 @@ acpi_cpu_idle_mwait(uint32_t mwait_hint)
int *state;
/*
* A comment in Linux patch claims that 'CPUs run faster with
* speculation protection disabled. All CPU threads in a core
* must disable speculation protection for it to be
* disabled. Disable it while we are idle so the other
* hyperthread can run fast.'
*
* XXXKIB. Software coordination mode should be supported,
* but all Intel CPUs provide hardware coordination.
*/
@ -150,9 +156,11 @@ acpi_cpu_idle_mwait(uint32_t mwait_hint)
KASSERT(*state == STATE_SLEEPING,
("cpu_mwait_cx: wrong monitorbuf state"));
*state = STATE_MWAIT;
handle_ibrs_entry();
cpu_monitor(state, 0, 0);
if (*state == STATE_MWAIT)
cpu_mwait(MWAIT_INTRBREAK, mwait_hint);
handle_ibrs_exit();
/*
* We should exit on any event that interrupts mwait, because
@ -569,3 +577,47 @@ nmi_handle_intr(u_int type, struct trapframe *frame)
nmi_call_kdb(PCPU_GET(cpuid), type, frame);
#endif
}
int hw_ibrs_active;
int hw_ibrs_disable = 1;
SYSCTL_INT(_hw, OID_AUTO, ibrs_active, CTLFLAG_RD, &hw_ibrs_active, 0,
"IBRS active");
void
hw_ibrs_recalculate(void)
{
uint64_t v;
if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_IBRS_ALL) != 0) {
if (hw_ibrs_disable) {
v= rdmsr(MSR_IA32_SPEC_CTRL);
v &= ~IA32_SPEC_CTRL_IBRS;
wrmsr(MSR_IA32_SPEC_CTRL, v);
} else {
v= rdmsr(MSR_IA32_SPEC_CTRL);
v |= IA32_SPEC_CTRL_IBRS;
wrmsr(MSR_IA32_SPEC_CTRL, v);
}
return;
}
hw_ibrs_active = (cpu_stdext_feature3 & CPUID_STDEXT3_IBPB) != 0 &&
!hw_ibrs_disable;
}
static int
hw_ibrs_disable_handler(SYSCTL_HANDLER_ARGS)
{
int error, val;
val = hw_ibrs_disable;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error != 0 || req->newptr == NULL)
return (error);
hw_ibrs_disable = val != 0;
hw_ibrs_recalculate();
return (0);
}
SYSCTL_PROC(_hw, OID_AUTO, ibrs_disable, CTLTYPE_INT | CTLFLAG_RWTUN |
CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, hw_ibrs_disable_handler, "I",
"Disable IBRS");