freebsd-dev/contrib/arm-optimized-routines/string/arm/strcpy.c
Alex Richardson 31914882fc Import Arm Optimized Routines v21.02
This is the new replacement for the existing cortex-strings code which will
be replaced in a follow-up commit.
We should also be able to use some of the math functions to allow the
tests to pass on AArch64 (and other architectures) instead of just x86.
We might also be able to reuse some of the tests for the kyua testsuite.

Imported using
```
curl -L e823e3abf5 | tar --strip-components=1 -xvzf -
git add .
```

Differential Revision: https://reviews.freebsd.org/D29035
git-subtree-dir: contrib/arm-optimized-routines
git-subtree-mainline: e34c713b0e
git-subtree-split: f9f37c002a
2021-07-06 11:05:34 +01:00

134 lines
3.0 KiB
C

/*
* strcpy
*
* Copyright (c) 2008-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#if defined (__thumb2__) && !defined (__thumb__)
/* For GLIBC:
#include <string.h>
#include <memcopy.h>
#undef strcmp
*/
#ifdef __thumb2__
#define magic1(REG) "#0x01010101"
#define magic2(REG) "#0x80808080"
#else
#define magic1(REG) #REG
#define magic2(REG) #REG ", lsl #7"
#endif
char* __attribute__((naked))
__strcpy_arm (char* dst, const char* src)
{
__asm__ (
"pld [r1, #0]\n\t"
"eor r2, r0, r1\n\t"
"mov ip, r0\n\t"
"tst r2, #3\n\t"
"bne 4f\n\t"
"tst r1, #3\n\t"
"bne 3f\n"
"5:\n\t"
# ifndef __thumb2__
"str r5, [sp, #-4]!\n\t"
"mov r5, #0x01\n\t"
"orr r5, r5, r5, lsl #8\n\t"
"orr r5, r5, r5, lsl #16\n\t"
# endif
"str r4, [sp, #-4]!\n\t"
"tst r1, #4\n\t"
"ldr r3, [r1], #4\n\t"
"beq 2f\n\t"
"sub r2, r3, "magic1(r5)"\n\t"
"bics r2, r2, r3\n\t"
"tst r2, "magic2(r5)"\n\t"
"itt eq\n\t"
"streq r3, [ip], #4\n\t"
"ldreq r3, [r1], #4\n"
"bne 1f\n\t"
/* Inner loop. We now know that r1 is 64-bit aligned, so we
can safely fetch up to two words. This allows us to avoid
load stalls. */
".p2align 2\n"
"2:\n\t"
"pld [r1, #8]\n\t"
"ldr r4, [r1], #4\n\t"
"sub r2, r3, "magic1(r5)"\n\t"
"bics r2, r2, r3\n\t"
"tst r2, "magic2(r5)"\n\t"
"sub r2, r4, "magic1(r5)"\n\t"
"bne 1f\n\t"
"str r3, [ip], #4\n\t"
"bics r2, r2, r4\n\t"
"tst r2, "magic2(r5)"\n\t"
"itt eq\n\t"
"ldreq r3, [r1], #4\n\t"
"streq r4, [ip], #4\n\t"
"beq 2b\n\t"
"mov r3, r4\n"
"1:\n\t"
# ifdef __ARMEB__
"rors r3, r3, #24\n\t"
# endif
"strb r3, [ip], #1\n\t"
"tst r3, #0xff\n\t"
# ifdef __ARMEL__
"ror r3, r3, #8\n\t"
# endif
"bne 1b\n\t"
"ldr r4, [sp], #4\n\t"
# ifndef __thumb2__
"ldr r5, [sp], #4\n\t"
# endif
"BX LR\n"
/* Strings have the same offset from word alignment, but it's
not zero. */
"3:\n\t"
"tst r1, #1\n\t"
"beq 1f\n\t"
"ldrb r2, [r1], #1\n\t"
"strb r2, [ip], #1\n\t"
"cmp r2, #0\n\t"
"it eq\n"
"BXEQ LR\n"
"1:\n\t"
"tst r1, #2\n\t"
"beq 5b\n\t"
"ldrh r2, [r1], #2\n\t"
# ifdef __ARMEB__
"tst r2, #0xff00\n\t"
"iteet ne\n\t"
"strneh r2, [ip], #2\n\t"
"lsreq r2, r2, #8\n\t"
"streqb r2, [ip]\n\t"
"tstne r2, #0xff\n\t"
# else
"tst r2, #0xff\n\t"
"itet ne\n\t"
"strneh r2, [ip], #2\n\t"
"streqb r2, [ip]\n\t"
"tstne r2, #0xff00\n\t"
# endif
"bne 5b\n\t"
"BX LR\n"
/* src and dst do not have a common word-alignement. Fall back to
byte copying. */
"4:\n\t"
"ldrb r2, [r1], #1\n\t"
"strb r2, [ip], #1\n\t"
"cmp r2, #0\n\t"
"bne 4b\n\t"
"BX LR");
}
/* For GLIBC: libc_hidden_builtin_def (strcpy) */
#endif /* defined (__thumb2__) && !defined (__thumb__) */