amd64: tidy up kernel memmove
There is no need to use %rax for temporary values and avoiding doing so shortens the func. Handle the explicit 'check for tail' depessimisization for backwards copying. This reduces the diff against userspace. Approved by: re (kib)
This commit is contained in:
parent
bd6c14afa7
commit
17f67f63b9
@ -108,12 +108,12 @@ END(sse2_pagezero)
|
||||
*/
|
||||
ENTRY(memmove_std)
|
||||
PUSH_FRAME_POINTER
|
||||
movq %rdi,%r9
|
||||
movq %rdi,%rax
|
||||
movq %rdx,%rcx
|
||||
|
||||
movq %rdi,%rax
|
||||
subq %rsi,%rax
|
||||
cmpq %rcx,%rax /* overlapping && src < dst? */
|
||||
movq %rdi,%r8
|
||||
subq %rsi,%r8
|
||||
cmpq %rcx,%r8 /* overlapping && src < dst? */
|
||||
jb 1f
|
||||
|
||||
shrq $3,%rcx /* copy by 64-bit words */
|
||||
@ -128,7 +128,6 @@ ENTRY(memmove_std)
|
||||
2:
|
||||
rep
|
||||
movsb
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
|
||||
@ -140,8 +139,10 @@ ENTRY(memmove_std)
|
||||
decq %rsi
|
||||
andq $7,%rcx /* any fractional bytes? */
|
||||
std
|
||||
jne 3f
|
||||
rep
|
||||
movsb
|
||||
3:
|
||||
movq %rdx,%rcx /* copy remainder by 32-bit words */
|
||||
shrq $3,%rcx
|
||||
subq $7,%rsi
|
||||
@ -149,24 +150,22 @@ ENTRY(memmove_std)
|
||||
rep
|
||||
movsq
|
||||
cld
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
END(memmove_std)
|
||||
|
||||
ENTRY(memmove_erms)
|
||||
PUSH_FRAME_POINTER
|
||||
movq %rdi,%r9
|
||||
movq %rdi,%rax
|
||||
movq %rdx,%rcx
|
||||
|
||||
movq %rdi,%rax
|
||||
subq %rsi,%rax
|
||||
cmpq %rcx,%rax /* overlapping && src < dst? */
|
||||
movq %rdi,%r8
|
||||
subq %rsi,%r8
|
||||
cmpq %rcx,%r8 /* overlapping && src < dst? */
|
||||
jb 1f
|
||||
|
||||
rep
|
||||
movsb
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
|
||||
@ -179,7 +178,6 @@ ENTRY(memmove_erms)
|
||||
rep
|
||||
movsb
|
||||
cld
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
END(memmove_erms)
|
||||
|
Loading…
Reference in New Issue
Block a user