amd64: handle small memmove buffers with overlapping stores

Handling sizes of > 32 backwards will be updated later.

Reviewed by:	kib (kernel part)
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D18387
This commit is contained in:
Mateusz Guzik 2018-11-30 20:58:08 +00:00
parent 1489776d43
commit 94243af2da
2 changed files with 103 additions and 80 deletions

View File

@ -42,11 +42,19 @@ __FBSDID("$FreeBSD$");
* rsi - source
* rdx - count
*
* The macro possibly clobbers the above and: rcx, r8.
* It does not clobber rax, r10 nor r11.
* The macro possibly clobbers the above and: rcx, r8, r9, 10
* It does not clobber rax nor r11.
*/
.macro MEMMOVE erms overlap begin end
\begin
/*
* For sizes 0..32 all data is read before it is written, so there
* is no correctness issue with direction of copying.
*/
cmpq $32,%rcx
jbe 101632f
.if \overlap == 1
movq %rdi,%r8
subq %rsi,%r8
@ -54,13 +62,10 @@ __FBSDID("$FreeBSD$");
jb 2f
.endif
cmpq $32,%rcx
jb 1016f
cmpq $256,%rcx
ja 1256f
1032:
103200:
movq (%rsi),%rdx
movq %rdx,(%rdi)
movq 8(%rsi),%rdx
@ -73,56 +78,62 @@ __FBSDID("$FreeBSD$");
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
jae 1032b
jae 103200b
cmpb $0,%cl
jne 1016f
jne 101632f
\end
ret
ALIGN_TEXT
1016:
101632:
cmpb $16,%cl
jl 1008f
jl 100816f
movq (%rsi),%rdx
movq 8(%rsi),%r8
movq -16(%rsi,%rcx),%r9
movq -8(%rsi,%rcx),%r10
movq %rdx,(%rdi)
movq 8(%rsi),%rdx
movq %rdx,8(%rdi)
subb $16,%cl
jz 1000f
leaq 16(%rsi),%rsi
leaq 16(%rdi),%rdi
1008:
movq %r8,8(%rdi)
movq %r9,-16(%rdi,%rcx)
movq %r10,-8(%rdi,%rcx)
\end
ret
ALIGN_TEXT
100816:
cmpb $8,%cl
jl 1004f
jl 100408f
movq (%rsi),%rdx
movq -8(%rsi,%rcx),%r8
movq %rdx,(%rdi)
subb $8,%cl
jz 1000f
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
1004:
movq %r8,-8(%rdi,%rcx,)
\end
ret
ALIGN_TEXT
100408:
cmpb $4,%cl
jl 1002f
jl 100204f
movl (%rsi),%edx
movl -4(%rsi,%rcx),%r8d
movl %edx,(%rdi)
subb $4,%cl
jz 1000f
leaq 4(%rsi),%rsi
leaq 4(%rdi),%rdi
1002:
movl %r8d,-4(%rdi,%rcx)
\end
ret
ALIGN_TEXT
100204:
cmpb $2,%cl
jl 1001f
movw (%rsi),%dx
jl 100001f
movzwl (%rsi),%edx
movzwl -2(%rsi,%rcx),%r8d
movw %dx,(%rdi)
subb $2,%cl
jz 1000f
leaq 2(%rsi),%rsi
leaq 2(%rdi),%rdi
1001:
movw %r8w,-2(%rdi,%rcx)
\end
ret
ALIGN_TEXT
100001:
cmpb $1,%cl
jl 1000f
jl 100000f
movb (%rsi),%dl
movb %dl,(%rdi)
1000:
100000:
\end
ret
@ -136,8 +147,8 @@ __FBSDID("$FreeBSD$");
rep
movsq
movq %rdx,%rcx
andb $7,%cl /* any bytes left? */
jne 1004b
andl $7,%ecx /* any bytes left? */
jne 100408b
.endif
\end
ret
@ -247,6 +258,7 @@ __FBSDID("$FreeBSD$");
.endif
.endm
.macro MEMMOVE_BEGIN
movq %rdi,%rax
movq %rdx,%rcx

View File

@ -205,11 +205,19 @@ END(memcmp)
* rsi - source
* rdx - count
*
* The macro possibly clobbers the above and: rcx, r8.
* It does not clobber rax, r10 nor r11.
* The macro possibly clobbers the above and: rcx, r8, r9, 10
* It does not clobber rax nor r11.
*/
.macro MEMMOVE erms overlap begin end
\begin
/*
* For sizes 0..32 all data is read before it is written, so there
* is no correctness issue with direction of copying.
*/
cmpq $32,%rcx
jbe 101632f
.if \overlap == 1
movq %rdi,%r8
subq %rsi,%r8
@ -217,13 +225,10 @@ END(memcmp)
jb 2f
.endif
cmpq $32,%rcx
jb 1016f
cmpq $256,%rcx
ja 1256f
1032:
103200:
movq (%rsi),%rdx
movq %rdx,(%rdi)
movq 8(%rsi),%rdx
@ -236,56 +241,62 @@ END(memcmp)
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
jae 1032b
jae 103200b
cmpb $0,%cl
jne 1016f
jne 101632f
\end
ret
ALIGN_TEXT
1016:
101632:
cmpb $16,%cl
jl 1008f
jl 100816f
movq (%rsi),%rdx
movq 8(%rsi),%r8
movq -16(%rsi,%rcx),%r9
movq -8(%rsi,%rcx),%r10
movq %rdx,(%rdi)
movq 8(%rsi),%rdx
movq %rdx,8(%rdi)
subb $16,%cl
jz 1000f
leaq 16(%rsi),%rsi
leaq 16(%rdi),%rdi
1008:
movq %r8,8(%rdi)
movq %r9,-16(%rdi,%rcx)
movq %r10,-8(%rdi,%rcx)
\end
ret
ALIGN_TEXT
100816:
cmpb $8,%cl
jl 1004f
jl 100408f
movq (%rsi),%rdx
movq -8(%rsi,%rcx),%r8
movq %rdx,(%rdi)
subb $8,%cl
jz 1000f
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
1004:
movq %r8,-8(%rdi,%rcx,)
\end
ret
ALIGN_TEXT
100408:
cmpb $4,%cl
jl 1002f
jl 100204f
movl (%rsi),%edx
movl -4(%rsi,%rcx),%r8d
movl %edx,(%rdi)
subb $4,%cl
jz 1000f
leaq 4(%rsi),%rsi
leaq 4(%rdi),%rdi
1002:
movl %r8d,-4(%rdi,%rcx)
\end
ret
ALIGN_TEXT
100204:
cmpb $2,%cl
jl 1001f
movw (%rsi),%dx
jl 100001f
movzwl (%rsi),%edx
movzwl -2(%rsi,%rcx),%r8d
movw %dx,(%rdi)
subb $2,%cl
jz 1000f
leaq 2(%rsi),%rsi
leaq 2(%rdi),%rdi
1001:
movw %r8w,-2(%rdi,%rcx)
\end
ret
ALIGN_TEXT
100001:
cmpb $1,%cl
jl 1000f
jl 100000f
movb (%rsi),%dl
movb %dl,(%rdi)
1000:
100000:
\end
ret
@ -299,8 +310,8 @@ END(memcmp)
rep
movsq
movq %rdx,%rcx
andb $7,%cl /* any bytes left? */
jne 1004b
andl $7,%ecx /* any bytes left? */
jne 100408b
.endif
\end
ret