amd64: sync up libc memset with the kernel version
- tidy up memset to have rax set earlier for small sizes - finish the tail in memset with an overlapping store - align memset buffers to 16 bytes before using rep stos Sponsored by: The FreeBSD Foundation
This commit is contained in:
parent
6fff634455
commit
ad2ff705a4
@ -31,12 +31,14 @@
|
||||
#include <machine/asm.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
|
||||
|
||||
.macro MEMSET erms
|
||||
movq %rdi,%r9
|
||||
movq %rdi,%rax
|
||||
movq %rdx,%rcx
|
||||
movzbq %sil,%r8
|
||||
movabs $0x0101010101010101,%rax
|
||||
imulq %r8,%rax
|
||||
movabs $0x0101010101010101,%r10
|
||||
imulq %r8,%r10
|
||||
|
||||
cmpq $32,%rcx
|
||||
jb 1016f
|
||||
@ -45,10 +47,10 @@ __FBSDID("$FreeBSD$");
|
||||
ja 1256f
|
||||
|
||||
1032:
|
||||
movq %rax,(%rdi)
|
||||
movq %rax,8(%rdi)
|
||||
movq %rax,16(%rdi)
|
||||
movq %rax,24(%rdi)
|
||||
movq %r10,(%rdi)
|
||||
movq %r10,8(%rdi)
|
||||
movq %r10,16(%rdi)
|
||||
movq %r10,24(%rdi)
|
||||
leaq 32(%rdi),%rdi
|
||||
subq $32,%rcx
|
||||
cmpq $32,%rcx
|
||||
@ -58,54 +60,72 @@ __FBSDID("$FreeBSD$");
|
||||
1016:
|
||||
cmpb $16,%cl
|
||||
jl 1008f
|
||||
movq %rax,(%rdi)
|
||||
movq %rax,8(%rdi)
|
||||
movq %r10,(%rdi)
|
||||
movq %r10,8(%rdi)
|
||||
subb $16,%cl
|
||||
jz 1000f
|
||||
leaq 16(%rdi),%rdi
|
||||
1008:
|
||||
cmpb $8,%cl
|
||||
jl 1004f
|
||||
movq %rax,(%rdi)
|
||||
movq %r10,(%rdi)
|
||||
subb $8,%cl
|
||||
jz 1000f
|
||||
leaq 8(%rdi),%rdi
|
||||
1004:
|
||||
cmpb $4,%cl
|
||||
jl 1002f
|
||||
movl %eax,(%rdi)
|
||||
movl %r10d,(%rdi)
|
||||
subb $4,%cl
|
||||
jz 1000f
|
||||
leaq 4(%rdi),%rdi
|
||||
1002:
|
||||
cmpb $2,%cl
|
||||
jl 1001f
|
||||
movw %ax,(%rdi)
|
||||
movw %r10w,(%rdi)
|
||||
subb $2,%cl
|
||||
jz 1000f
|
||||
leaq 2(%rdi),%rdi
|
||||
1001:
|
||||
cmpb $1,%cl
|
||||
jl 1000f
|
||||
movb %al,(%rdi)
|
||||
movb %r10b,(%rdi)
|
||||
1000:
|
||||
movq %r9,%rax
|
||||
ret
|
||||
|
||||
ALIGN_TEXT
|
||||
1256:
|
||||
movq %rdi,%r9
|
||||
movq %r10,%rax
|
||||
testl $15,%edi
|
||||
jnz 3f
|
||||
1:
|
||||
.if \erms == 1
|
||||
rep
|
||||
stosb
|
||||
movq %r9,%rax
|
||||
.else
|
||||
movq %rcx,%rdx
|
||||
shrq $3,%rcx
|
||||
rep
|
||||
stosq
|
||||
movq %rdx,%rcx
|
||||
andb $7,%cl
|
||||
jne 1004b
|
||||
.endif
|
||||
movq %r9,%rax
|
||||
andl $7,%edx
|
||||
jnz 2f
|
||||
ret
|
||||
2:
|
||||
movq %r10,-8(%rdi,%rdx)
|
||||
.endif
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
3:
|
||||
movq %r10,(%rdi)
|
||||
movq %r10,8(%rdi)
|
||||
movq %rdi,%r8
|
||||
andq $15,%r8
|
||||
leaq -16(%rcx,%r8),%rcx
|
||||
neg %r8
|
||||
leaq 16(%rdi,%r8),%rdi
|
||||
jmp 1b
|
||||
.endm
|
||||
|
||||
ENTRY(memset)
|
||||
|
Loading…
Reference in New Issue
Block a user