diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f87d3b5c599b..3cb85301e93f 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -5192,7 +5192,7 @@ pmap_zero_page_idle(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - pagezero((void *)va); + sse2_pagezero((void *)va); } /* diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index bc07fbda739c..1a4e910ee10e 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -78,6 +78,12 @@ ENTRY(sse2_pagezero) movq $-PAGE_SIZE,%rdx subq %rdx,%rdi xorl %eax,%eax + jmp 1f + /* + * The loop takes 29 bytes. Ensure that it doesn't cross a 32-byte + * cache line. + */ + .p2align 5,0x90 1: movnti %rax,(%rdi,%rdx) movnti %rax,8(%rdi,%rdx) @@ -88,7 +94,7 @@ ENTRY(sse2_pagezero) sfence POP_FRAME_POINTER ret -END(pagezero) +END(sse2_pagezero) ENTRY(bcmp) PUSH_FRAME_POINTER diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index e4c50eba7642..f161c97d8e41 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -57,6 +57,7 @@ void gsbase_load_fault(void) __asm(__STRING(gsbase_load_fault)); void fpstate_drop(struct thread *td); void pagezero(void *addr); void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); +void sse2_pagezero(void *addr); struct savefpu *get_pcb_user_save_td(struct thread *td); struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb); diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s index 63912b6aba86..03d08cb52855 100644 --- a/sys/i386/i386/support.s +++ b/sys/i386/i386/support.s @@ -69,9 +69,16 @@ ENTRY(sse2_pagezero) movl %ecx,%eax addl $4096,%eax xor %ebx,%ebx + jmp 1f + /* + * The loop takes 14 bytes. Ensure that it doesn't cross a 16-byte + * cache line. + */ + .p2align 4,0x90 1: movnti %ebx,(%ecx) - addl $4,%ecx + movnti %ebx,4(%ecx) + addl $8,%ecx cmpl %ecx,%eax jne 1b sfence