Add a machine-specific, optimized implementation of strcmp.

PR: 73111
Submitted by: Ville-Pertti Keinonen <will@iki.fi> (taken from NetBSD)
MFC after: 3 weeks
This commit is contained in:
alc 2005-04-09 20:47:08 +00:00
parent e401f53792
commit 654c522ae8
2 changed files with 75 additions and 1 deletions

View File

@ -1,3 +1,4 @@
# $FreeBSD$
MDSRCS+= bcmp.S bcopy.S bzero.S memcmp.S memcpy.S memmove.S memset.S
MDSRCS+= bcmp.S bcopy.S bzero.S memcmp.S memcpy.S memmove.S memset.S \
strcmp.S

View File

@ -0,0 +1,73 @@
/*
* Written by J.T. Conklin <jtc@acorntoolworks.com>
* Public domain.
*/
#include <machine/asm.h>
__FBSDID("$FreeBSD$");
#if 0
RCSID("$NetBSD: strcmp.S,v 1.3 2004/07/19 20:04:41 drochner Exp $")
#endif
ENTRY(strcmp)
/*
* Align s1 to word boundary.
* Consider unrolling loop?
*/
.Ls1align:
testb $7,%dil
je .Ls1aligned
movb (%rdi),%al
incq %rdi
movb (%rsi),%dl
incq %rsi
testb %al,%al
je .Ldone
cmpb %al,%dl
je .Ls1align
jmp .Ldone
/*
* Check whether s2 is aligned to a word boundry. If it is, we
* can compare by words. Otherwise we have to compare by bytes.
*/
.Ls1aligned:
testb $7,%sil
jne .Lbyte_loop
movabsq $0x0101010101010101,%r8
subq $8,%rdi
movabsq $0x8080808080808080,%r9
subq $8,%rsi
.align 4
.Lword_loop:
movq 8(%rdi),%rax
addq $8,%rdi
movq 8(%rsi),%rdx
addq $8,%rsi
cmpq %rax,%rdx
jne .Lbyte_loop
subq %r8,%rdx
notq %rax
andq %rax,%rdx
testq %r9,%rdx
je .Lword_loop
.align 4
.Lbyte_loop:
movb (%rdi),%al
incq %rdi
movb (%rsi),%dl
incq %rsi
testb %al,%al
je .Ldone
cmpb %al,%dl
je .Lbyte_loop
.Ldone:
movzbq %al,%rax
movzbq %dl,%rdx
subq %rdx,%rax
ret