amd64: handle small memmove buffers with overlapping stores
Handling sizes of > 32 backwards will be updated later. Reviewed by: kib (kernel part) Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18387
This commit is contained in:
parent
b1a82c58d4
commit
f5d5aead70
@ -42,11 +42,19 @@ __FBSDID("$FreeBSD$");
|
||||
* rsi - source
|
||||
* rdx - count
|
||||
*
|
||||
* The macro possibly clobbers the above and: rcx, r8.
|
||||
* It does not clobber rax, r10 nor r11.
|
||||
* The macro possibly clobbers the above and: rcx, r8, r9, 10
|
||||
* It does not clobber rax nor r11.
|
||||
*/
|
||||
.macro MEMMOVE erms overlap begin end
|
||||
\begin
|
||||
|
||||
/*
|
||||
* For sizes 0..32 all data is read before it is written, so there
|
||||
* is no correctness issue with direction of copying.
|
||||
*/
|
||||
cmpq $32,%rcx
|
||||
jbe 101632f
|
||||
|
||||
.if \overlap == 1
|
||||
movq %rdi,%r8
|
||||
subq %rsi,%r8
|
||||
@ -54,13 +62,10 @@ __FBSDID("$FreeBSD$");
|
||||
jb 2f
|
||||
.endif
|
||||
|
||||
cmpq $32,%rcx
|
||||
jb 1016f
|
||||
|
||||
cmpq $256,%rcx
|
||||
ja 1256f
|
||||
|
||||
1032:
|
||||
103200:
|
||||
movq (%rsi),%rdx
|
||||
movq %rdx,(%rdi)
|
||||
movq 8(%rsi),%rdx
|
||||
@ -73,56 +78,62 @@ __FBSDID("$FreeBSD$");
|
||||
leaq 32(%rdi),%rdi
|
||||
subq $32,%rcx
|
||||
cmpq $32,%rcx
|
||||
jae 1032b
|
||||
jae 103200b
|
||||
cmpb $0,%cl
|
||||
jne 1016f
|
||||
jne 101632f
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
1016:
|
||||
101632:
|
||||
cmpb $16,%cl
|
||||
jl 1008f
|
||||
jl 100816f
|
||||
movq (%rsi),%rdx
|
||||
movq 8(%rsi),%r8
|
||||
movq -16(%rsi,%rcx),%r9
|
||||
movq -8(%rsi,%rcx),%r10
|
||||
movq %rdx,(%rdi)
|
||||
movq 8(%rsi),%rdx
|
||||
movq %rdx,8(%rdi)
|
||||
subb $16,%cl
|
||||
jz 1000f
|
||||
leaq 16(%rsi),%rsi
|
||||
leaq 16(%rdi),%rdi
|
||||
1008:
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,-16(%rdi,%rcx)
|
||||
movq %r10,-8(%rdi,%rcx)
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
100816:
|
||||
cmpb $8,%cl
|
||||
jl 1004f
|
||||
jl 100408f
|
||||
movq (%rsi),%rdx
|
||||
movq -8(%rsi,%rcx),%r8
|
||||
movq %rdx,(%rdi)
|
||||
subb $8,%cl
|
||||
jz 1000f
|
||||
leaq 8(%rsi),%rsi
|
||||
leaq 8(%rdi),%rdi
|
||||
1004:
|
||||
movq %r8,-8(%rdi,%rcx,)
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
100408:
|
||||
cmpb $4,%cl
|
||||
jl 1002f
|
||||
jl 100204f
|
||||
movl (%rsi),%edx
|
||||
movl -4(%rsi,%rcx),%r8d
|
||||
movl %edx,(%rdi)
|
||||
subb $4,%cl
|
||||
jz 1000f
|
||||
leaq 4(%rsi),%rsi
|
||||
leaq 4(%rdi),%rdi
|
||||
1002:
|
||||
movl %r8d,-4(%rdi,%rcx)
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
100204:
|
||||
cmpb $2,%cl
|
||||
jl 1001f
|
||||
movw (%rsi),%dx
|
||||
jl 100001f
|
||||
movzwl (%rsi),%edx
|
||||
movzwl -2(%rsi,%rcx),%r8d
|
||||
movw %dx,(%rdi)
|
||||
subb $2,%cl
|
||||
jz 1000f
|
||||
leaq 2(%rsi),%rsi
|
||||
leaq 2(%rdi),%rdi
|
||||
1001:
|
||||
movw %r8w,-2(%rdi,%rcx)
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
100001:
|
||||
cmpb $1,%cl
|
||||
jl 1000f
|
||||
jl 100000f
|
||||
movb (%rsi),%dl
|
||||
movb %dl,(%rdi)
|
||||
1000:
|
||||
100000:
|
||||
\end
|
||||
ret
|
||||
|
||||
@ -136,8 +147,8 @@ __FBSDID("$FreeBSD$");
|
||||
rep
|
||||
movsq
|
||||
movq %rdx,%rcx
|
||||
andb $7,%cl /* any bytes left? */
|
||||
jne 1004b
|
||||
andl $7,%ecx /* any bytes left? */
|
||||
jne 100408b
|
||||
.endif
|
||||
\end
|
||||
ret
|
||||
@ -247,6 +258,7 @@ __FBSDID("$FreeBSD$");
|
||||
.endif
|
||||
.endm
|
||||
|
||||
|
||||
.macro MEMMOVE_BEGIN
|
||||
movq %rdi,%rax
|
||||
movq %rdx,%rcx
|
||||
|
@ -205,11 +205,19 @@ END(memcmp)
|
||||
* rsi - source
|
||||
* rdx - count
|
||||
*
|
||||
* The macro possibly clobbers the above and: rcx, r8.
|
||||
* It does not clobber rax, r10 nor r11.
|
||||
* The macro possibly clobbers the above and: rcx, r8, r9, 10
|
||||
* It does not clobber rax nor r11.
|
||||
*/
|
||||
.macro MEMMOVE erms overlap begin end
|
||||
\begin
|
||||
|
||||
/*
|
||||
* For sizes 0..32 all data is read before it is written, so there
|
||||
* is no correctness issue with direction of copying.
|
||||
*/
|
||||
cmpq $32,%rcx
|
||||
jbe 101632f
|
||||
|
||||
.if \overlap == 1
|
||||
movq %rdi,%r8
|
||||
subq %rsi,%r8
|
||||
@ -217,13 +225,10 @@ END(memcmp)
|
||||
jb 2f
|
||||
.endif
|
||||
|
||||
cmpq $32,%rcx
|
||||
jb 1016f
|
||||
|
||||
cmpq $256,%rcx
|
||||
ja 1256f
|
||||
|
||||
1032:
|
||||
103200:
|
||||
movq (%rsi),%rdx
|
||||
movq %rdx,(%rdi)
|
||||
movq 8(%rsi),%rdx
|
||||
@ -236,56 +241,62 @@ END(memcmp)
|
||||
leaq 32(%rdi),%rdi
|
||||
subq $32,%rcx
|
||||
cmpq $32,%rcx
|
||||
jae 1032b
|
||||
jae 103200b
|
||||
cmpb $0,%cl
|
||||
jne 1016f
|
||||
jne 101632f
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
1016:
|
||||
101632:
|
||||
cmpb $16,%cl
|
||||
jl 1008f
|
||||
jl 100816f
|
||||
movq (%rsi),%rdx
|
||||
movq 8(%rsi),%r8
|
||||
movq -16(%rsi,%rcx),%r9
|
||||
movq -8(%rsi,%rcx),%r10
|
||||
movq %rdx,(%rdi)
|
||||
movq 8(%rsi),%rdx
|
||||
movq %rdx,8(%rdi)
|
||||
subb $16,%cl
|
||||
jz 1000f
|
||||
leaq 16(%rsi),%rsi
|
||||
leaq 16(%rdi),%rdi
|
||||
1008:
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,-16(%rdi,%rcx)
|
||||
movq %r10,-8(%rdi,%rcx)
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
100816:
|
||||
cmpb $8,%cl
|
||||
jl 1004f
|
||||
jl 100408f
|
||||
movq (%rsi),%rdx
|
||||
movq -8(%rsi,%rcx),%r8
|
||||
movq %rdx,(%rdi)
|
||||
subb $8,%cl
|
||||
jz 1000f
|
||||
leaq 8(%rsi),%rsi
|
||||
leaq 8(%rdi),%rdi
|
||||
1004:
|
||||
movq %r8,-8(%rdi,%rcx,)
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
100408:
|
||||
cmpb $4,%cl
|
||||
jl 1002f
|
||||
jl 100204f
|
||||
movl (%rsi),%edx
|
||||
movl -4(%rsi,%rcx),%r8d
|
||||
movl %edx,(%rdi)
|
||||
subb $4,%cl
|
||||
jz 1000f
|
||||
leaq 4(%rsi),%rsi
|
||||
leaq 4(%rdi),%rdi
|
||||
1002:
|
||||
movl %r8d,-4(%rdi,%rcx)
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
100204:
|
||||
cmpb $2,%cl
|
||||
jl 1001f
|
||||
movw (%rsi),%dx
|
||||
jl 100001f
|
||||
movzwl (%rsi),%edx
|
||||
movzwl -2(%rsi,%rcx),%r8d
|
||||
movw %dx,(%rdi)
|
||||
subb $2,%cl
|
||||
jz 1000f
|
||||
leaq 2(%rsi),%rsi
|
||||
leaq 2(%rdi),%rdi
|
||||
1001:
|
||||
movw %r8w,-2(%rdi,%rcx)
|
||||
\end
|
||||
ret
|
||||
ALIGN_TEXT
|
||||
100001:
|
||||
cmpb $1,%cl
|
||||
jl 1000f
|
||||
jl 100000f
|
||||
movb (%rsi),%dl
|
||||
movb %dl,(%rdi)
|
||||
1000:
|
||||
100000:
|
||||
\end
|
||||
ret
|
||||
|
||||
@ -299,8 +310,8 @@ END(memcmp)
|
||||
rep
|
||||
movsq
|
||||
movq %rdx,%rcx
|
||||
andb $7,%cl /* any bytes left? */
|
||||
jne 1004b
|
||||
andl $7,%ecx /* any bytes left? */
|
||||
jne 100408b
|
||||
.endif
|
||||
\end
|
||||
ret
|
||||
|
Loading…
x
Reference in New Issue
Block a user