amd64: align target memmove buffer to 16 bytes before using rep movs

See the review for sample test results.

Reviewed by:	kib (kernel part)
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D18401
This commit is contained in:
Mateusz Guzik 2018-12-01 14:20:32 +00:00
parent df5ceb3b66
commit ddf6571230
2 changed files with 60 additions and 0 deletions

View File

@ -139,6 +139,8 @@ __FBSDID("$FreeBSD$");
ALIGN_TEXT
1256:
testb $15,%dil
jnz 100f
.if \erms == 1
rep
movsb
@ -152,6 +154,34 @@ __FBSDID("$FreeBSD$");
.endif
\end
ret
100:
movq (%rsi),%r8
movq 8(%rsi),%r9
movq %rdi,%r10
movq %rdi,%rcx
andq $15,%rcx
leaq -16(%rdx,%rcx),%rdx
neg %rcx
leaq 16(%rdi,%rcx),%rdi
leaq 16(%rsi,%rcx),%rsi
movq %rdx,%rcx
.if \erms == 1
rep
movsb
movq %r8,(%r10)
movq %r9,8(%r10)
.else
shrq $3,%rcx /* copy by 64-bit words */
rep
movsq
movq %r8,(%r10)
movq %r9,8(%r10)
movq %rdx,%rcx
andl $7,%ecx /* any bytes left? */
jne 100408b
.endif
\end
ret
.if \overlap == 1
/*

View File

@ -302,6 +302,8 @@ END(memcmp)
ALIGN_TEXT
1256:
testb $15,%dil
jnz 100f
.if \erms == 1
rep
movsb
@ -315,6 +317,34 @@ END(memcmp)
.endif
\end
ret
100:
movq (%rsi),%r8
movq 8(%rsi),%r9
movq %rdi,%r10
movq %rdi,%rcx
andq $15,%rcx
leaq -16(%rdx,%rcx),%rdx
neg %rcx
leaq 16(%rdi,%rcx),%rdi
leaq 16(%rsi,%rcx),%rsi
movq %rdx,%rcx
.if \erms == 1
rep
movsb
movq %r8,(%r10)
movq %r9,8(%r10)
.else
shrq $3,%rcx /* copy by 64-bit words */
rep
movsq
movq %r8,(%r10)
movq %r9,8(%r10)
movq %rdx,%rcx
andl $7,%ecx /* any bytes left? */
jne 100408b
.endif
\end
ret
.if \overlap == 1
/*