289 lines
5.8 KiB
ArmAsm

/* $NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Trevor Blackwell. Support for use as memcpy() and memmove()
* added by Chris Demetriou.
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
#include <machine/asm.h>
#if defined(MEMCOPY) || defined(MEMMOVE)
#ifdef MEMCOPY
#define FUNCTION memcpy
#else
#define FUNCTION memmove
#endif
#define SRCREG a1
#define DSTREG a0
#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
#define FUNCTION bcopy
#define SRCREG a0
#define DSTREG a1
#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
#define SIZEREG a2
/*
* Copy bytes.
*
* void bcopy(char *from, char *to, size_t len);
* char *memcpy(void *to, const void *from, size_t len);
* char *memmove(void *to, const void *from, size_t len);
*
* No matter how invoked, the source and destination registers
* for calculation. There's no point in copying them to "working"
* registers, since the code uses their values "in place," and
* copying them would be slower.
*/
LEAF(FUNCTION,3)
#if defined(MEMCOPY) || defined(MEMMOVE)
/* set up return value, while we still can */
mov DSTREG,v0
#endif
/* Check for negative length */
ble SIZEREG,bcopy_done
/* Check for overlap */
subq DSTREG,SRCREG,t5
cmpult t5,SIZEREG,t5
bne t5,bcopy_overlap
/* a3 = end address */
addq SRCREG,SIZEREG,a3
/* Get the first word */
ldq_u t2,0(SRCREG)
/* Do they have the same alignment? */
xor SRCREG,DSTREG,t0
and t0,7,t0
and DSTREG,7,t1
bne t0,bcopy_different_alignment
/* src & dst have same alignment */
beq t1,bcopy_all_aligned
ldq_u t3,0(DSTREG)
addq SIZEREG,t1,SIZEREG
mskqh t2,SRCREG,t2
mskql t3,SRCREG,t3
or t2,t3,t2
/* Dst is 8-byte aligned */
bcopy_all_aligned:
/* If less than 8 bytes,skip loop */
subq SIZEREG,1,t0
and SIZEREG,7,SIZEREG
bic t0,7,t0
beq t0,bcopy_samealign_lp_end
bcopy_samealign_lp:
stq_u t2,0(DSTREG)
addq DSTREG,8,DSTREG
ldq_u t2,8(SRCREG)
subq t0,8,t0
addq SRCREG,8,SRCREG
bne t0,bcopy_samealign_lp
bcopy_samealign_lp_end:
/* If we're done, exit */
bne SIZEREG,bcopy_small_left
stq_u t2,0(DSTREG)
RET
bcopy_small_left:
mskql t2,SIZEREG,t4
ldq_u t3,0(DSTREG)
mskqh t3,SIZEREG,t3
or t4,t3,t4
stq_u t4,0(DSTREG)
RET
bcopy_different_alignment:
/*
* this is the fun part
*/
addq SRCREG,SIZEREG,a3
cmpule SIZEREG,8,t0
bne t0,bcopy_da_finish
beq t1,bcopy_da_noentry
/* Do the initial partial word */
subq zero,DSTREG,t0
and t0,7,t0
ldq_u t3,7(SRCREG)
extql t2,SRCREG,t2
extqh t3,SRCREG,t3
or t2,t3,t5
insql t5,DSTREG,t5
ldq_u t6,0(DSTREG)
mskql t6,DSTREG,t6
or t5,t6,t5
stq_u t5,0(DSTREG)
addq SRCREG,t0,SRCREG
addq DSTREG,t0,DSTREG
subq SIZEREG,t0,SIZEREG
ldq_u t2,0(SRCREG)
bcopy_da_noentry:
subq SIZEREG,1,t0
bic t0,7,t0
and SIZEREG,7,SIZEREG
beq t0,bcopy_da_finish2
bcopy_da_lp:
ldq_u t3,7(SRCREG)
addq SRCREG,8,SRCREG
extql t2,SRCREG,t4
extqh t3,SRCREG,t5
subq t0,8,t0
or t4,t5,t5
stq t5,0(DSTREG)
addq DSTREG,8,DSTREG
beq t0,bcopy_da_finish1
ldq_u t2,7(SRCREG)
addq SRCREG,8,SRCREG
extql t3,SRCREG,t4
extqh t2,SRCREG,t5
subq t0,8,t0
or t4,t5,t5
stq t5,0(DSTREG)
addq DSTREG,8,DSTREG
bne t0,bcopy_da_lp
bcopy_da_finish2:
/* Do the last new word */
mov t2,t3
bcopy_da_finish1:
/* Do the last partial word */
ldq_u t2,-1(a3)
extql t3,SRCREG,t3
extqh t2,SRCREG,t2
or t2,t3,t2
br zero,bcopy_samealign_lp_end
bcopy_da_finish:
/* Do the last word in the next source word */
ldq_u t3,-1(a3)
extql t2,SRCREG,t2
extqh t3,SRCREG,t3
or t2,t3,t2
insqh t2,DSTREG,t3
insql t2,DSTREG,t2
lda t4,-1(zero)
mskql t4,SIZEREG,t5
cmovne t5,t5,t4
insqh t4,DSTREG,t5
insql t4,DSTREG,t4
addq DSTREG,SIZEREG,a4
ldq_u t6,0(DSTREG)
ldq_u t7,-1(a4)
bic t6,t4,t6
bic t7,t5,t7
and t2,t4,t2
and t3,t5,t3
or t2,t6,t2
or t3,t7,t3
stq_u t3,-1(a4)
stq_u t2,0(DSTREG)
RET
bcopy_overlap:
/*
* Basically equivalent to previous case, only backwards.
* Not quite as highly optimized
*/
addq SRCREG,SIZEREG,a3
addq DSTREG,SIZEREG,a4
/* less than 8 bytes - don't worry about overlap */
cmpule SIZEREG,8,t0
bne t0,bcopy_ov_short
/* Possibly do a partial first word */
and a4,7,t4
beq t4,bcopy_ov_nostart2
subq a3,t4,a3
subq a4,t4,a4
ldq_u t1,0(a3)
subq SIZEREG,t4,SIZEREG
ldq_u t2,7(a3)
ldq t3,0(a4)
extql t1,a3,t1
extqh t2,a3,t2
or t1,t2,t1
mskqh t3,t4,t3
mskql t1,t4,t1
or t1,t3,t1
stq t1,0(a4)
bcopy_ov_nostart2:
bic SIZEREG,7,t4
and SIZEREG,7,SIZEREG
beq t4,bcopy_ov_lp_end
bcopy_ov_lp:
/* This could be more pipelined, but it doesn't seem worth it */
ldq_u t0,-8(a3)
subq a4,8,a4
ldq_u t1,-1(a3)
subq a3,8,a3
extql t0,a3,t0
extqh t1,a3,t1
subq t4,8,t4
or t0,t1,t0
stq t0,0(a4)
bne t4,bcopy_ov_lp
bcopy_ov_lp_end:
beq SIZEREG,bcopy_done
ldq_u t0,0(SRCREG)
ldq_u t1,7(SRCREG)
ldq_u t2,0(DSTREG)
extql t0,SRCREG,t0
extqh t1,SRCREG,t1
or t0,t1,t0
insql t0,DSTREG,t0
mskql t2,DSTREG,t2
or t2,t0,t2
stq_u t2,0(DSTREG)
bcopy_done:
RET
bcopy_ov_short:
ldq_u t2,0(SRCREG)
br zero,bcopy_da_finish
END(FUNCTION)