diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc index 4432e59c2f22..5395ed30772a 100644 --- a/lib/libc/amd64/string/Makefile.inc +++ b/lib/libc/amd64/string/Makefile.inc @@ -1,4 +1,4 @@ # $FreeBSD$ MDSRCS+= bcmp.S bcopy.S bzero.S memcmp.S memcpy.S memmove.S memset.S \ - strcmp.S + strcmp.S strcpy.S diff --git a/lib/libc/amd64/string/strcpy.S b/lib/libc/amd64/string/strcpy.S new file mode 100644 index 000000000000..c67406dff550 --- /dev/null +++ b/lib/libc/amd64/string/strcpy.S @@ -0,0 +1,112 @@ +/* + * Written by J.T. Conklin <jtc@acorntoolworks.com> + * Public domain. + */ + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + +#if 0 + RCSID("$NetBSD: strcpy.S,v 1.3 2004/07/19 20:04:41 drochner Exp $") +#endif + +/* + * This strcpy implementation copies a byte at a time until the + * source pointer is aligned to a word boundary, it then copies by + * words until it finds a word containing a zero byte, and finally + * copies by bytes until the end of the string is reached. + * + * While this may result in unaligned stores if the source and + * destination pointers are unaligned with respect to each other, + * it is still faster than either byte copies or the overhead of + * an implementation suitable for machines with strict alignment + * requirements. + */ + +ENTRY(strcpy) + movq %rdi,%rax + movabsq $0x0101010101010101,%r8 + movabsq $0x8080808080808080,%r9 + + /* + * Align source to a word boundary. + * Consider unrolling loop? + */ + .align 4 +.Lalign: + testb $7,%sil + je .Lword_aligned + movb (%rsi),%dl + incq %rsi + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl + jne .Lalign + ret + + .align 4 +.Lloop: + movq %rdx,(%rdi) + addq $8,%rdi +.Lword_aligned: + movq (%rsi),%rdx + movq %rdx,%rcx + addq $8,%rsi + subq %r8,%rcx + testq %r9,%rcx + je .Lloop + + /* + * In rare cases, the above loop may exit prematurely. We must + * return to the loop if none of the bytes in the word equal 0. + */ + + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl /* 1st byte == 0? */ + je .Ldone + + shrq $8,%rdx + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl /* 2nd byte == 0? */ + je .Ldone + + shrq $8,%rdx + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl /* 3rd byte == 0? */ + je .Ldone + + shrq $8,%rdx + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl /* 4th byte == 0? */ + je .Ldone + + shrq $8,%rdx + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl /* 5th byte == 0? */ + je .Ldone + + shrq $8,%rdx + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl /* 6th byte == 0? */ + je .Ldone + + shrq $8,%rdx + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl /* 7th byte == 0? */ + je .Ldone + + shrq $8,%rdx + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl /* 8th byte == 0? */ + jne .Lword_aligned + +.Ldone: + ret