amd64: tidy up kernel memmove

There is no need to use %rax for temporary values and avoiding doing
so shortens the func.
Handle the explicit 'check for tail' depessimisization for backwards copying.

This reduces the diff against userspace.

Approved by:	re (kib)
This commit is contained in:
Mateusz Guzik 2018-09-16 19:28:27 +00:00
parent bd6c14afa7
commit 17f67f63b9

View File

@ -108,12 +108,12 @@ END(sse2_pagezero)
*/
ENTRY(memmove_std)
PUSH_FRAME_POINTER
movq %rdi,%r9
movq %rdi,%rax
movq %rdx,%rcx
movq %rdi,%rax
subq %rsi,%rax
cmpq %rcx,%rax /* overlapping && src < dst? */
movq %rdi,%r8
subq %rsi,%r8
cmpq %rcx,%r8 /* overlapping && src < dst? */
jb 1f
shrq $3,%rcx /* copy by 64-bit words */
@ -128,7 +128,6 @@ ENTRY(memmove_std)
2:
rep
movsb
movq %r9,%rax
POP_FRAME_POINTER
ret
@ -140,8 +139,10 @@ ENTRY(memmove_std)
decq %rsi
andq $7,%rcx /* any fractional bytes? */
std
jne 3f
rep
movsb
3:
movq %rdx,%rcx /* copy remainder by 32-bit words */
shrq $3,%rcx
subq $7,%rsi
@ -149,24 +150,22 @@ ENTRY(memmove_std)
rep
movsq
cld
movq %r9,%rax
POP_FRAME_POINTER
ret
END(memmove_std)
ENTRY(memmove_erms)
PUSH_FRAME_POINTER
movq %rdi,%r9
movq %rdi,%rax
movq %rdx,%rcx
movq %rdi,%rax
subq %rsi,%rax
cmpq %rcx,%rax /* overlapping && src < dst? */
movq %rdi,%r8
subq %rsi,%r8
cmpq %rcx,%r8 /* overlapping && src < dst? */
jb 1f
rep
movsb
movq %r9,%rax
POP_FRAME_POINTER
ret
@ -179,7 +178,6 @@ ENTRY(memmove_erms)
rep
movsb
cld
movq %r9,%rax
POP_FRAME_POINTER
ret
END(memmove_erms)