amd64: move memcmp checks upfront
This is a tradeoff which saves jumps for smaller sizes while making the 8-16 range slower (roughly in line with the other cases). Tested with glibc test suite. For example size 3 (most common with vfs namecache) (ops/s): before: 407086026 after: 461391995 The regressed range of 8-16 (with 8 as example): before: 540850489 after: 461671032
This commit is contained in:
parent
0db6aef407
commit
f1be262ec1
@ -45,9 +45,25 @@ ENTRY(memcmp)
|
|||||||
cmpq $16,%rdx
|
cmpq $16,%rdx
|
||||||
ja 101632f
|
ja 101632f
|
||||||
|
|
||||||
100816:
|
|
||||||
cmpb $8,%dl
|
cmpb $8,%dl
|
||||||
jl 100408f
|
jg 100816f
|
||||||
|
|
||||||
|
cmpb $4,%dl
|
||||||
|
jg 100408f
|
||||||
|
|
||||||
|
cmpb $2,%dl
|
||||||
|
jge 100204f
|
||||||
|
|
||||||
|
cmpb $1,%dl
|
||||||
|
jl 100000f
|
||||||
|
movzbl (%rdi),%eax
|
||||||
|
movzbl (%rsi),%r8d
|
||||||
|
subl %r8d,%eax
|
||||||
|
100000:
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN_TEXT
|
||||||
|
100816:
|
||||||
movq (%rdi),%r8
|
movq (%rdi),%r8
|
||||||
movq (%rsi),%r9
|
movq (%rsi),%r9
|
||||||
cmpq %r8,%r9
|
cmpq %r8,%r9
|
||||||
@ -57,9 +73,8 @@ ENTRY(memcmp)
|
|||||||
cmpq %r8,%r9
|
cmpq %r8,%r9
|
||||||
jne 10081608f
|
jne 10081608f
|
||||||
ret
|
ret
|
||||||
|
ALIGN_TEXT
|
||||||
100408:
|
100408:
|
||||||
cmpb $4,%dl
|
|
||||||
jl 100204f
|
|
||||||
movl (%rdi),%r8d
|
movl (%rdi),%r8d
|
||||||
movl (%rsi),%r9d
|
movl (%rsi),%r9d
|
||||||
cmpl %r8d,%r9d
|
cmpl %r8d,%r9d
|
||||||
@ -69,9 +84,8 @@ ENTRY(memcmp)
|
|||||||
cmpl %r8d,%r9d
|
cmpl %r8d,%r9d
|
||||||
jne 10040804f
|
jne 10040804f
|
||||||
ret
|
ret
|
||||||
|
ALIGN_TEXT
|
||||||
100204:
|
100204:
|
||||||
cmpb $2,%dl
|
|
||||||
jl 100001f
|
|
||||||
movzwl (%rdi),%r8d
|
movzwl (%rdi),%r8d
|
||||||
movzwl (%rsi),%r9d
|
movzwl (%rsi),%r9d
|
||||||
cmpl %r8d,%r9d
|
cmpl %r8d,%r9d
|
||||||
@ -81,15 +95,7 @@ ENTRY(memcmp)
|
|||||||
cmpl %r8d,%r9d
|
cmpl %r8d,%r9d
|
||||||
jne 1f
|
jne 1f
|
||||||
ret
|
ret
|
||||||
100001:
|
ALIGN_TEXT
|
||||||
cmpb $1,%dl
|
|
||||||
jl 100000f
|
|
||||||
movzbl (%rdi),%eax
|
|
||||||
movzbl (%rsi),%r8d
|
|
||||||
subl %r8d,%eax
|
|
||||||
100000:
|
|
||||||
ret
|
|
||||||
ALIGN_TEXT
|
|
||||||
101632:
|
101632:
|
||||||
cmpq $32,%rdx
|
cmpq $32,%rdx
|
||||||
ja 103200f
|
ja 103200f
|
||||||
@ -110,7 +116,7 @@ ALIGN_TEXT
|
|||||||
cmpq %r8,%r9
|
cmpq %r8,%r9
|
||||||
jne 10163224f
|
jne 10163224f
|
||||||
ret
|
ret
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
103200:
|
103200:
|
||||||
movq (%rdi),%r8
|
movq (%rdi),%r8
|
||||||
movq 8(%rdi),%r9
|
movq 8(%rdi),%r9
|
||||||
@ -140,7 +146,7 @@ ALIGN_TEXT
|
|||||||
*
|
*
|
||||||
* Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
|
* Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
|
||||||
*/
|
*/
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10320016:
|
10320016:
|
||||||
leaq 16(%rdi),%rdi
|
leaq 16(%rdi),%rdi
|
||||||
leaq 16(%rsi),%rsi
|
leaq 16(%rsi),%rsi
|
||||||
@ -152,29 +158,29 @@ ALIGN_TEXT
|
|||||||
leaq 8(%rdi),%rdi
|
leaq 8(%rdi),%rdi
|
||||||
leaq 8(%rsi),%rsi
|
leaq 8(%rsi),%rsi
|
||||||
jmp 80f
|
jmp 80f
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10081608:
|
10081608:
|
||||||
10163224:
|
10163224:
|
||||||
leaq -8(%rdi,%rdx),%rdi
|
leaq -8(%rdi,%rdx),%rdi
|
||||||
leaq -8(%rsi,%rdx),%rsi
|
leaq -8(%rsi,%rdx),%rsi
|
||||||
jmp 80f
|
jmp 80f
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10163216:
|
10163216:
|
||||||
leaq -16(%rdi,%rdx),%rdi
|
leaq -16(%rdi,%rdx),%rdi
|
||||||
leaq -16(%rsi,%rdx),%rsi
|
leaq -16(%rsi,%rdx),%rsi
|
||||||
jmp 80f
|
jmp 80f
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10163208:
|
10163208:
|
||||||
leaq 8(%rdi),%rdi
|
leaq 8(%rdi),%rdi
|
||||||
leaq 8(%rsi),%rsi
|
leaq 8(%rsi),%rsi
|
||||||
jmp 80f
|
jmp 80f
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10040804:
|
10040804:
|
||||||
leaq -4(%rdi,%rdx),%rdi
|
leaq -4(%rdi,%rdx),%rdi
|
||||||
leaq -4(%rsi,%rdx),%rsi
|
leaq -4(%rsi,%rdx),%rsi
|
||||||
jmp 1f
|
jmp 1f
|
||||||
|
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
80:
|
80:
|
||||||
movl (%rdi),%r8d
|
movl (%rdi),%r8d
|
||||||
movl (%rsi),%r9d
|
movl (%rsi),%r9d
|
||||||
|
@ -93,9 +93,26 @@ ENTRY(memcmp)
|
|||||||
cmpq $16,%rdx
|
cmpq $16,%rdx
|
||||||
ja 101632f
|
ja 101632f
|
||||||
|
|
||||||
100816:
|
|
||||||
cmpb $8,%dl
|
cmpb $8,%dl
|
||||||
jl 100408f
|
jg 100816f
|
||||||
|
|
||||||
|
cmpb $4,%dl
|
||||||
|
jg 100408f
|
||||||
|
|
||||||
|
cmpb $2,%dl
|
||||||
|
jge 100204f
|
||||||
|
|
||||||
|
cmpb $1,%dl
|
||||||
|
jl 100000f
|
||||||
|
movzbl (%rdi),%eax
|
||||||
|
movzbl (%rsi),%r8d
|
||||||
|
subl %r8d,%eax
|
||||||
|
100000:
|
||||||
|
POP_FRAME_POINTER
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN_TEXT
|
||||||
|
100816:
|
||||||
movq (%rdi),%r8
|
movq (%rdi),%r8
|
||||||
movq (%rsi),%r9
|
movq (%rsi),%r9
|
||||||
cmpq %r8,%r9
|
cmpq %r8,%r9
|
||||||
@ -106,9 +123,8 @@ ENTRY(memcmp)
|
|||||||
jne 10081608f
|
jne 10081608f
|
||||||
POP_FRAME_POINTER
|
POP_FRAME_POINTER
|
||||||
ret
|
ret
|
||||||
|
ALIGN_TEXT
|
||||||
100408:
|
100408:
|
||||||
cmpb $4,%dl
|
|
||||||
jl 100204f
|
|
||||||
movl (%rdi),%r8d
|
movl (%rdi),%r8d
|
||||||
movl (%rsi),%r9d
|
movl (%rsi),%r9d
|
||||||
cmpl %r8d,%r9d
|
cmpl %r8d,%r9d
|
||||||
@ -119,9 +135,8 @@ ENTRY(memcmp)
|
|||||||
jne 10040804f
|
jne 10040804f
|
||||||
POP_FRAME_POINTER
|
POP_FRAME_POINTER
|
||||||
ret
|
ret
|
||||||
|
ALIGN_TEXT
|
||||||
100204:
|
100204:
|
||||||
cmpb $2,%dl
|
|
||||||
jl 100001f
|
|
||||||
movzwl (%rdi),%r8d
|
movzwl (%rdi),%r8d
|
||||||
movzwl (%rsi),%r9d
|
movzwl (%rsi),%r9d
|
||||||
cmpl %r8d,%r9d
|
cmpl %r8d,%r9d
|
||||||
@ -132,16 +147,7 @@ ENTRY(memcmp)
|
|||||||
jne 1f
|
jne 1f
|
||||||
POP_FRAME_POINTER
|
POP_FRAME_POINTER
|
||||||
ret
|
ret
|
||||||
100001:
|
ALIGN_TEXT
|
||||||
cmpb $1,%dl
|
|
||||||
jl 100000f
|
|
||||||
movzbl (%rdi),%eax
|
|
||||||
movzbl (%rsi),%r8d
|
|
||||||
subl %r8d,%eax
|
|
||||||
100000:
|
|
||||||
POP_FRAME_POINTER
|
|
||||||
ret
|
|
||||||
ALIGN_TEXT
|
|
||||||
101632:
|
101632:
|
||||||
cmpq $32,%rdx
|
cmpq $32,%rdx
|
||||||
ja 103200f
|
ja 103200f
|
||||||
@ -163,7 +169,7 @@ ALIGN_TEXT
|
|||||||
jne 10163224f
|
jne 10163224f
|
||||||
POP_FRAME_POINTER
|
POP_FRAME_POINTER
|
||||||
ret
|
ret
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
103200:
|
103200:
|
||||||
movq (%rdi),%r8
|
movq (%rdi),%r8
|
||||||
movq 8(%rdi),%r9
|
movq 8(%rdi),%r9
|
||||||
@ -194,7 +200,7 @@ ALIGN_TEXT
|
|||||||
*
|
*
|
||||||
* Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
|
* Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
|
||||||
*/
|
*/
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10320016:
|
10320016:
|
||||||
leaq 16(%rdi),%rdi
|
leaq 16(%rdi),%rdi
|
||||||
leaq 16(%rsi),%rsi
|
leaq 16(%rsi),%rsi
|
||||||
@ -206,29 +212,29 @@ ALIGN_TEXT
|
|||||||
leaq 8(%rdi),%rdi
|
leaq 8(%rdi),%rdi
|
||||||
leaq 8(%rsi),%rsi
|
leaq 8(%rsi),%rsi
|
||||||
jmp 80f
|
jmp 80f
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10081608:
|
10081608:
|
||||||
10163224:
|
10163224:
|
||||||
leaq -8(%rdi,%rdx),%rdi
|
leaq -8(%rdi,%rdx),%rdi
|
||||||
leaq -8(%rsi,%rdx),%rsi
|
leaq -8(%rsi,%rdx),%rsi
|
||||||
jmp 80f
|
jmp 80f
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10163216:
|
10163216:
|
||||||
leaq -16(%rdi,%rdx),%rdi
|
leaq -16(%rdi,%rdx),%rdi
|
||||||
leaq -16(%rsi,%rdx),%rsi
|
leaq -16(%rsi,%rdx),%rsi
|
||||||
jmp 80f
|
jmp 80f
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10163208:
|
10163208:
|
||||||
leaq 8(%rdi),%rdi
|
leaq 8(%rdi),%rdi
|
||||||
leaq 8(%rsi),%rsi
|
leaq 8(%rsi),%rsi
|
||||||
jmp 80f
|
jmp 80f
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
10040804:
|
10040804:
|
||||||
leaq -4(%rdi,%rdx),%rdi
|
leaq -4(%rdi,%rdx),%rdi
|
||||||
leaq -4(%rsi,%rdx),%rsi
|
leaq -4(%rsi,%rdx),%rsi
|
||||||
jmp 1f
|
jmp 1f
|
||||||
|
|
||||||
ALIGN_TEXT
|
ALIGN_TEXT
|
||||||
80:
|
80:
|
||||||
movl (%rdi),%r8d
|
movl (%rdi),%r8d
|
||||||
movl (%rsi),%r9d
|
movl (%rsi),%r9d
|
||||||
|
Loading…
Reference in New Issue
Block a user