amd64: tidy up kernel memmove, take 2
There is no need to use %rax for temporary values and avoiding doing so shortens the func. Handle the explicit 'check for tail' depessimisization for backwards copying. This reduces the diff against userspace. Tested with the glibc test suite. Approved by: re (kib)
This commit is contained in:
parent
23ec0d58bf
commit
d6943c5804
@ -108,40 +108,40 @@ END(sse2_pagezero)
|
||||
*/
|
||||
ENTRY(memmove_std)
|
||||
PUSH_FRAME_POINTER
|
||||
movq %rdi,%r9
|
||||
movq %rdi,%rax
|
||||
movq %rdx,%rcx
|
||||
|
||||
movq %rdi,%rax
|
||||
subq %rsi,%rax
|
||||
cmpq %rcx,%rax /* overlapping && src < dst? */
|
||||
jb 1f
|
||||
movq %rdi,%r8
|
||||
subq %rsi,%r8
|
||||
cmpq %rcx,%r8 /* overlapping && src < dst? */
|
||||
jb 2f
|
||||
|
||||
shrq $3,%rcx /* copy by 64-bit words */
|
||||
rep
|
||||
movsq
|
||||
movq %rdx,%rcx
|
||||
andq $7,%rcx /* any bytes left? */
|
||||
jne 2f
|
||||
movq %r9,%rax
|
||||
jne 1f
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
2:
|
||||
1:
|
||||
rep
|
||||
movsb
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
|
||||
/* ALIGN_TEXT */
|
||||
1:
|
||||
2:
|
||||
addq %rcx,%rdi /* copy backwards */
|
||||
addq %rcx,%rsi
|
||||
decq %rdi
|
||||
decq %rsi
|
||||
andq $7,%rcx /* any fractional bytes? */
|
||||
std
|
||||
andq $7,%rcx /* any fractional bytes? */
|
||||
je 3f
|
||||
rep
|
||||
movsb
|
||||
3:
|
||||
movq %rdx,%rcx /* copy remainder by 32-bit words */
|
||||
shrq $3,%rcx
|
||||
subq $7,%rsi
|
||||
@ -149,24 +149,22 @@ ENTRY(memmove_std)
|
||||
rep
|
||||
movsq
|
||||
cld
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
END(memmove_std)
|
||||
|
||||
ENTRY(memmove_erms)
|
||||
PUSH_FRAME_POINTER
|
||||
movq %rdi,%r9
|
||||
movq %rdi,%rax
|
||||
movq %rdx,%rcx
|
||||
|
||||
movq %rdi,%rax
|
||||
subq %rsi,%rax
|
||||
cmpq %rcx,%rax /* overlapping && src < dst? */
|
||||
movq %rdi,%r8
|
||||
subq %rsi,%r8
|
||||
cmpq %rcx,%r8 /* overlapping && src < dst? */
|
||||
jb 1f
|
||||
|
||||
rep
|
||||
movsb
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
|
||||
@ -179,7 +177,6 @@ ENTRY(memmove_erms)
|
||||
rep
|
||||
movsb
|
||||
cld
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
END(memmove_erms)
|
||||
|
Loading…
x
Reference in New Issue
Block a user