[PPC64] memcpy/memmove/bcopy optimization

For copies shorter than 512 bytes, the data is copied using plain
ld/std instructions.
For 512 bytes or more, the copy is done in 3 phases:

Phase 1: copy from the src buffer until it's aligned at a 16-byte boundary
Phase 2: copy as many aligned 64-byte blocks from the src buffer as possible
Phase 3: copy the remaining data, if any

In phase 2, this code uses VSX instructions when available. Otherwise,
it uses ldx/stdx.

Submitted by:	Luis Pires <lffpires_ruabrasil.org> (original version)
Reviewed by:	jhibbits
Differential Revision:	https://reviews.freebsd.org/D15118
This commit is contained in:
Leandro Lupori 2020-01-15 20:25:52 +00:00
parent 181e35008c
commit e16c18650c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=356767
10 changed files with 648 additions and 0 deletions

View File

@ -1,6 +1,15 @@
# $FreeBSD$
MDSRCS+= \
bcopy.S \
bcopy_vsx.S \
bcopy_resolver.c \
memcpy.S \
memcpy_vsx.S \
memcpy_resolver.c \
memmove.S \
memmove_vsx.S \
memmove_resolver.c \
strcpy_arch_2_05.S \
strcpy.c \
strcpy_resolver.c \

View File

@ -0,0 +1,306 @@
/*-
* Copyright (c) 2018 Instituto de Pesquisas Eldorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the author nor the names of its contributors may
* be used to endorse or promote products derived from this software
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <machine/asm.h>
__FBSDID("$FreeBSD$");
#define BLOCK_SIZE_BITS 6
#define BLOCK_SIZE (1 << BLOCK_SIZE_BITS)
#define BLOCK_SIZE_MASK (BLOCK_SIZE - 1)
#define MULTI_PHASE_THRESHOLD 512
#ifndef FN_NAME
#ifdef MEMMOVE
#define FN_NAME __memmove
WEAK_REFERENCE(__memmove, memmove);
#else
#define FN_NAME __bcopy
WEAK_REFERENCE(__bcopy, bcopy);
#endif
#endif
/*
* r3: dst
* r4: src
* r5: len
*/
ENTRY(FN_NAME)
cmpld %r3, %r4 /* src == dst? nothing to do */
beqlr-
cmpdi %r5, 0 /* len == 0? nothing to do */
beqlr-
#ifdef MEMMOVE
std %r3, -8(%r1) /* save dst */
#else /* bcopy: swap src/dst */
mr %r0, %r3
mr %r3, %r4
mr %r4, %r0
#endif
cmpldi %r5, MULTI_PHASE_THRESHOLD
bge .Lmulti_phase
/* align src */
cmpd %r4, %r3 /* forward or backward copy? */
blt .Lbackward_align
.align 5
.Lalign:
andi. %r0, %r4, 15
beq .Lsingle_copy
lbz %r0, 0(%r4)
addi %r4, %r4, 1
stb %r0, 0(%r3)
addi %r3, %r3, 1
addi %r5, %r5, -1
cmpdi %r5, 0
beq- .Ldone
b .Lalign
.Lbackward_align:
/* advance src and dst to end (past last byte) */
add %r3, %r3, %r5
add %r4, %r4, %r5
.align 5
.Lbackward_align_loop:
andi. %r0, %r4, 15
beq .Lbackward_single_copy
lbzu %r0, -1(%r4)
addi %r5, %r5, -1
stbu %r0, -1(%r3)
cmpdi %r5, 0
beq- .Ldone
b .Lbackward_align_loop
.Lsingle_copy:
/* forward copy */
li %r0, 1
li %r8, 16
li %r9, 0
b .Lsingle_phase
.Lbackward_single_copy:
/* backward copy */
li %r0, -1
li %r8, -16
li %r9, -15
/* point src and dst to last byte */
addi %r3, %r3, -1
addi %r4, %r4, -1
.Lsingle_phase:
srdi. %r6, %r5, 4 /* number of 16-bytes */
beq .Lsingle_1
/* pre-adjustment */
add %r3, %r3, %r9
add %r4, %r4, %r9
mtctr %r6
.align 5
.Lsingle_16_loop:
ld %r6, 0(%r4)
ld %r7, 8(%r4)
add %r4, %r4, %r8
std %r6, 0(%r3)
std %r7, 8(%r3)
add %r3, %r3, %r8
bdnz .Lsingle_16_loop
/* post-adjustment */
sub %r3, %r3, %r9
sub %r4, %r4, %r9
.Lsingle_1:
andi. %r6, %r5, 0x0f /* number of 1-bytes */
beq .Ldone /* 1-bytes == 0? done */
mtctr %r6
.align 5
.Lsingle_1_loop:
lbz %r6, 0(%r4)
add %r4, %r4, %r0 /* increment */
stb %r6, 0(%r3)
add %r3, %r3, %r0 /* increment */
bdnz .Lsingle_1_loop
.Ldone:
#ifdef MEMMOVE
ld %r3, -8(%r1) /* restore dst */
#endif
blr
.Lmulti_phase:
/* set up multi-phase copy parameters */
/* r7 = bytes before the aligned section of the buffer */
andi. %r6, %r4, 15
subfic %r7, %r6, 16
/* r8 = bytes in and after the aligned section of the buffer */
sub %r8, %r5, %r7
/* r9 = bytes after the aligned section of the buffer */
andi. %r9, %r8, BLOCK_SIZE_MASK
/* r10 = BLOCKS in the aligned section of the buffer */
srdi %r10, %r8, BLOCK_SIZE_BITS
/* forward or backward copy? */
cmpd %r4, %r3
blt .Lbackward_multi_copy
/* set up forward copy parameters */
std %r7, -32(%r1) /* bytes to copy in phase 1 */
std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */
std %r9, -48(%r1) /* bytes to copy in phase 3 */
li %r0, 1 /* increment for phases 1 and 3 */
li %r5, BLOCK_SIZE /* increment for phase 2 */
/* op offsets for phase 2 */
li %r7, 0
li %r8, 16
li %r9, 32
li %r10, 48
std %r8, -16(%r1) /* 16-byte increment (16) */
std %r7, -24(%r1) /* 16-byte pre/post adjustment (0) */
b .Lphase1
.Lbackward_multi_copy:
/* set up backward copy parameters */
std %r9, -32(%r1) /* bytes to copy in phase 1 */
std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */
std %r7, -48(%r1) /* bytes to copy in phase 3 */
li %r0, -1 /* increment for phases 1 and 3 */
add %r6, %r5, %r0 /* r6 = len - 1 */
li %r5, -BLOCK_SIZE /* increment for phase 2 */
/* advance src and dst to the last position */
add %r3, %r3, %r6
add %r4, %r4, %r6
/* op offsets for phase 2 */
li %r7, -15
li %r8, -31
li %r9, -47
li %r10, -63
add %r6, %r7, %r0 /* r6 = -16 */
std %r6, -16(%r1) /* 16-byte increment (-16) */
std %r7, -24(%r1) /* 16-byte pre/post adjustment (-15) */
.Lphase1:
ld %r6, -32(%r1) /* bytes to copy in phase 1 */
cmpldi %r6, 0 /* r6 == 0? skip phase 1 */
beq+ .Lphase2
mtctr %r6
.align 5
.Lphase1_loop:
lbz %r6, 0(%r4)
add %r4, %r4, %r0 /* phase 1 increment */
stb %r6, 0(%r3)
add %r3, %r3, %r0 /* phase 1 increment */
bdnz .Lphase1_loop
.Lphase2:
ld %r6, -40(%r1) /* BLOCKS to copy in phase 2 */
cmpldi %r6, 0 /* %r6 == 0? skip phase 2 */
beq .Lphase3
#ifdef FN_PHASE2
FN_PHASE2
#else
/* save registers */
std %r14, -56(%r1)
std %r15, -64(%r1)
std %r16, -72(%r1)
std %r17, -80(%r1)
std %r18, -88(%r1)
std %r19, -96(%r1)
std %r20, -104(%r1)
std %r21, -112(%r1)
addi %r18, %r7, 8
addi %r19, %r8, 8
addi %r20, %r9, 8
addi %r21, %r10, 8
mtctr %r6
.align 5
.Lphase2_loop:
ldx %r14, %r7, %r4
ldx %r15, %r18, %r4
ldx %r16, %r8, %r4
ldx %r17, %r19, %r4
stdx %r14, %r7, %r3
stdx %r15, %r18, %r3
stdx %r16, %r8, %r3
stdx %r17, %r19, %r3
ldx %r14, %r9, %r4
ldx %r15, %r20, %r4
ldx %r16, %r10, %r4
ldx %r17, %r21, %r4
stdx %r14, %r9, %r3
stdx %r15, %r20, %r3
stdx %r16, %r10, %r3
stdx %r17, %r21, %r3
add %r4, %r4, %r5 /* phase 2 increment */
add %r3, %r3, %r5 /* phase 2 increment */
bdnz .Lphase2_loop
/* restore registers */
ld %r14, -56(%r1)
ld %r15, -64(%r1)
ld %r16, -72(%r1)
ld %r17, -80(%r1)
ld %r18, -88(%r1)
ld %r19, -96(%r1)
ld %r20, -104(%r1)
ld %r21, -112(%r1)
#endif
.Lphase3:
/* load registers for transitioning into the single-phase logic */
ld %r5, -48(%r1) /* bytes to copy in phase 3 */
ld %r8, -16(%r1) /* 16-byte increment */
ld %r9, -24(%r1) /* 16-byte pre/post adjustment */
b .Lsingle_phase
END(FN_NAME)
.section .note.GNU-stack,"",%progbits

View File

@ -0,0 +1,68 @@
/*-
* Copyright (c) 2018 Instituto de Pesquisas Eldorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the author nor the names of its contributors may
* be used to endorse or promote products derived from this software
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <machine/cpu.h>
#include <machine/ifunc.h>
#define _CAT(a,b) a##b
#define CAT(a,b) _CAT(a,b)
#define CAT3(a,b,c) CAT(CAT(a,b),c)
#ifdef MEMCOPY
#define FN_NAME memcpy
#define FN_RET void *
#define FN_PARAMS (void *dst, const void *src, size_t len)
#elif defined(MEMMOVE)
#define FN_NAME memmove
#define FN_RET void *
#define FN_PARAMS (void *dst, const void *src, size_t len)
#else
#define FN_NAME bcopy
#define FN_RET void
#define FN_PARAMS (const void *src, void *dst, size_t len)
#endif
#define FN_NAME_NOVSX CAT(__, FN_NAME)
#define FN_NAME_VSX CAT3(__, FN_NAME, _vsx)
FN_RET FN_NAME_NOVSX FN_PARAMS;
FN_RET FN_NAME_VSX FN_PARAMS;
DEFINE_UIFUNC(, FN_RET, FN_NAME, FN_PARAMS)
{
if (cpu_features & PPC_FEATURE_HAS_VSX)
return (FN_NAME_VSX);
else
return (FN_NAME_NOVSX);
}

View File

@ -0,0 +1,61 @@
/*-
* Copyright (c) 2018 Instituto de Pesquisas Eldorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the author nor the names of its contributors may
* be used to endorse or promote products derived from this software
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef FN_NAME
#define FN_NAME __bcopy_vsx
#endif
/*
* r3: dst
* r4: src
* r5: block increment
* r6: blocks to copy
* r7/r8/r9/r10: 16-byte offsets to copy
*/
#define FN_PHASE2 \
mtctr %r6 ;\
.align 5 ;\
.Lphase2_loop: ;\
lxvd2x %vs6, %r7, %r4 ;\
lxvd2x %vs7, %r8, %r4 ;\
lxvd2x %vs8, %r9, %r4 ;\
lxvd2x %vs9, %r10, %r4 ;\
stxvd2x %vs6, %r7, %r3 ;\
stxvd2x %vs7, %r8, %r3 ;\
stxvd2x %vs8, %r9, %r3 ;\
stxvd2x %vs9, %r10, %r3 ;\
/* phase 2 increment */ ;\
add %r4, %r4, %r5 ;\
add %r3, %r3, %r5 ;\
\
bdnz .Lphase2_loop ;\
#include "bcopy.S"

View File

@ -0,0 +1,122 @@
/*-
* Copyright (c) 2018 Instituto de Pesquisas Eldorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the author nor the names of its contributors may
* be used to endorse or promote products derived from this software
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <machine/asm.h>
__FBSDID("$FreeBSD$");
#ifndef FN_NAME
#define FN_NAME __memcpy
WEAK_REFERENCE(__memcpy, memcpy);
#define BLOCK_BITS 4
#endif
#define BLOCK_BYTES (1 << BLOCK_BITS)
#define BLOCK_MASK (BLOCK_BYTES - 1)
/*
* r3: dst
* r4: src
* r5: len
*/
ENTRY(FN_NAME)
cmpdi %r5, 0 /* len == 0? nothing to do */
beqlr-
mr %r8, %r3 /* save dst */
/* align src */
.Lalignment_loop:
lbz %r6, 0(%r4)
stb %r6, 0(%r3)
addi %r3, %r3, 1
addi %r4, %r4, 1
addi %r5, %r5, -1
cmpdi %r5, 0
beq .Lexit
andi. %r0, %r4, BLOCK_MASK
bne .Lalignment_loop
/* r7 = remaining, non-block, bytes */
andi. %r7, %r5, BLOCK_MASK
/* Check if there are blocks of BLOCK_BYTES to be copied */
xor. %r5, %r5, %r7
beq .Lcopy_remaining_fix_index_byte
#ifdef FN_COPY_LOOP
FN_COPY_LOOP
#else
/* Setup to copy word with ldu and stdu */
ld %r6, 0(%r4)
ld %r9, 8(%r4)
std %r6, 0(%r3)
std %r9, 8(%r3)
addi %r5, %r5, -BLOCK_BYTES
cmpd %r5, 0
beq .Lcopy_remaining_fix_index_word
srdi %r5, %r5, BLOCK_BITS
mtctr %r5
.Lcopy_word:
ldu %r6, 16(%r4)
ld %r9, 8(%r4)
stdu %r6, 16(%r3)
std %r9, 8(%r3)
bdnz .Lcopy_word
.Lcopy_remaining_fix_index_word:
/* Check if there are remaining bytes */
cmpd %r7, 0
beq .Lexit
addi %r3, %r3, BLOCK_MASK
addi %r4, %r4, BLOCK_MASK
b .Lcopy_remaining
#endif
.Lcopy_remaining_fix_index_byte:
addi %r4, %r4, -1
addi %r3, %r3, -1
/* Copy remaining bytes */
.Lcopy_remaining:
mtctr %r7
.Lcopy_remaining_loop:
lbzu %r6, 1(%r4)
stbu %r6, 1(%r3)
bdnz .Lcopy_remaining_loop
.Lexit:
/* Restore dst */
mr %r3, %r8
blr
END(FN_NAME)
.section .note.GNU-stack,"",%progbits

View File

@ -0,0 +1,4 @@
/* $FreeBSD$ */
#define MEMCOPY
#include "bcopy_resolver.c"

View File

@ -0,0 +1,65 @@
/*-
* Copyright (c) 2018 Instituto de Pesquisas Eldorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the author nor the names of its contributors may
* be used to endorse or promote products derived from this software
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#define FN_NAME __memcpy_vsx
#define BLOCK_BITS 6
/*
* r5: bytes to copy (multiple of BLOCK_BYTES)
*
*/
#define FN_COPY_LOOP \
/* Load CTR with number of blocks */ \
srdi %r5, %r5, BLOCK_BITS ;\
mtctr %r5 ;\
/* Prepare indexes to load and store data */ \
xor %r6, %r6, %r6 ;\
li %r9, 16 ;\
li %r10, 32 ;\
li %r11, 48 ;\
.Lcopy_vsx_loop: \
lxvd2x %vs6, %r6, %r4 ;\
lxvd2x %vs7, %r9, %r4 ;\
lxvd2x %vs8, %r10, %r4 ;\
lxvd2x %vs9, %r11, %r4 ;\
stxvd2x %vs6, %r6, %r3 ;\
stxvd2x %vs7, %r9, %r3 ;\
stxvd2x %vs8, %r10, %r3 ;\
stxvd2x %vs9, %r11, %r3 ;\
\
addi %r3, %r3, BLOCK_BYTES ;\
addi %r4, %r4, BLOCK_BYTES ;\
bdnz .Lcopy_vsx_loop ;\
\
/* Check if there is remaining bytes */ \
cmpd %r7, 0 ;\
beq .Lexit ;\
#include "memcpy.S"

View File

@ -0,0 +1,4 @@
/* $FreeBSD$ */
#define MEMMOVE
#include "bcopy.S"

View File

@ -0,0 +1,4 @@
/* $FreeBSD$ */
#define MEMMOVE
#include "bcopy_resolver.c"

View File

@ -0,0 +1,5 @@
/* $FreeBSD$ */
#define MEMMOVE
#define FN_NAME __memmove_vsx
#include "bcopy_vsx.S"