amd64: import asm strlen into libc

Reviewed by:	kib
Differential Revision:	https://reviews.freebsd.org/D28845
This commit is contained in:
Mateusz Guzik 2021-02-21 21:20:04 +00:00
parent 701d6b50ae
commit 7f06b217c5
2 changed files with 82 additions and 0 deletions

View File

@ -8,4 +8,5 @@ MDSRCS+= \
memset.S \
strcat.S \
strcmp.S \
strlen.S \
stpcpy.S

View File

@ -0,0 +1,81 @@
/*
* Written by Mateusz Guzik <mjg@freebsd.org>
* Public domain.
*/
#include <machine/asm.h>
__FBSDID("$FreeBSD$");
/*
* Note: this routine was written with kernel use in mind (read: no simd),
* it is only present in userspace as a temporary measure until something
* better gets imported.
*/
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
/*
* strlen(string)
* %rdi
*
* Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
*
* 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
* with leaq.
*
* For a description see either:
* - "Hacker's Delight" by Henry S. Warren, Jr.
* - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
* by Agner Fog
*
* The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
*/
ENTRY(strlen)
movabsq $0xfefefefefefefeff,%r8
movabsq $0x8080808080808080,%r9
movq %rdi,%r10
movq %rdi,%rcx
testb $7,%dil
jz 2f
/*
* Handle misaligned reads: align to 8 and fill
* the spurious bytes.
*/
andq $~7,%rdi
movq (%rdi),%r11
shlq $3,%rcx
movq $-1,%rdx
shlq %cl,%rdx
notq %rdx
orq %rdx,%r11
leaq (%r11,%r8),%rcx
notq %r11
andq %r11,%rcx
andq %r9,%rcx
jnz 3f
/*
* Main loop.
*/
ALIGN_TEXT
1:
leaq 8(%rdi),%rdi
2:
movq (%rdi),%r11
leaq (%r11,%r8),%rcx
notq %r11
andq %r11,%rcx
andq %r9,%rcx
jz 1b
3:
bsfq %rcx,%rcx
shrq $3,%rcx
leaq (%rcx,%rdi),%rax
subq %r10,%rax
ret
END(strlen)
.section .note.GNU-stack,"",%progbits