Use a better version of memcpy/bcopy for mips kernel.
Use a variant of mips libc memcpy for kernel. This implementation uses 64-bit operations when compiled for 64-bit, and is significantly faster in that case. Submitted by: Tanmay Jagdale <tanmayj@broadcom.com>
This commit is contained in:
parent
58909b74b9
commit
cbd49bff46
@ -38,6 +38,7 @@ mips/mips/stack_machdep.c optional ddb | stack
|
||||
mips/mips/stdatomic.c standard \
|
||||
compile-with "${NORMAL_C:N-Wmissing-prototypes}"
|
||||
mips/mips/support.S standard
|
||||
mips/mips/bcopy.S standard
|
||||
mips/mips/swtch.S standard
|
||||
mips/mips/sys_machdep.c standard
|
||||
mips/mips/tlb.c standard
|
||||
|
286
sys/mips/mips/bcopy.S
Normal file
286
sys/mips/mips/bcopy.S
Normal file
@ -0,0 +1,286 @@
|
||||
/* $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $ */
|
||||
|
||||
/*
|
||||
* Mach Operating System
|
||||
* Copyright (c) 1993 Carnegie Mellon University
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and its
|
||||
* documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
|
||||
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie Mellon
|
||||
* the rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* File: mips_bcopy.s
|
||||
* Author: Chris Maeda
|
||||
* Date: June 1993
|
||||
*
|
||||
* Fast copy routine. Derived from aligned_block_copy.
|
||||
*/
|
||||
|
||||
|
||||
#include <machine/asm.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <machine/endian.h>
|
||||
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
#if 0
|
||||
ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
|
||||
#else
|
||||
ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $")
|
||||
#endif
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
|
||||
#ifdef __ABICALLS__
|
||||
.abicalls
|
||||
#endif
|
||||
|
||||
/*
|
||||
* bcopy(caddr_t src, caddr_t dst, unsigned int len)
|
||||
*
|
||||
* a0 src address
|
||||
* a1 dst address
|
||||
* a2 length
|
||||
*/
|
||||
|
||||
#define SRCREG a0
|
||||
#define DSTREG a1
|
||||
#define SIZEREG a2
|
||||
|
||||
LEAF(memcpy)
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
move v0, a0
|
||||
move a0, a1
|
||||
move a1, v0
|
||||
|
||||
ALEAF(bcopy)
|
||||
ALEAF(ovbcopy)
|
||||
/*
|
||||
* Make sure we can copy forwards.
|
||||
*/
|
||||
sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
|
||||
bne t0,zero,6f # copy backwards
|
||||
|
||||
/*
|
||||
* There are four alignment cases (with frequency)
|
||||
* (Based on measurements taken with a DECstation 5000/200
|
||||
* inside a Mach kernel.)
|
||||
*
|
||||
* aligned -> aligned (mostly)
|
||||
* unaligned -> aligned (sometimes)
|
||||
* aligned,unaligned -> unaligned (almost never)
|
||||
*
|
||||
* Note that we could add another case that checks if
|
||||
* the destination and source are unaligned but the
|
||||
* copy is alignable. eg if src and dest are both
|
||||
* on a halfword boundary.
|
||||
*/
|
||||
andi t1,DSTREG,(SZREG-1) # get last bits of dest
|
||||
bne t1,zero,3f # dest unaligned
|
||||
andi t0,SRCREG,(SZREG-1) # get last bits of src
|
||||
bne t0,zero,5f
|
||||
|
||||
/*
|
||||
* Forward aligned->aligned copy, 8 words at a time.
|
||||
*/
|
||||
98:
|
||||
li AT,-(SZREG*8)
|
||||
and t0,SIZEREG,AT # count truncated to multiples
|
||||
PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr
|
||||
sltu AT,SRCREG,a3 # any work to do?
|
||||
beq AT,zero,2f
|
||||
PTR_SUBU SIZEREG,t0
|
||||
|
||||
/*
|
||||
* loop body
|
||||
*/
|
||||
1: # cp
|
||||
REG_L t3,(0*SZREG)(SRCREG)
|
||||
REG_L v1,(1*SZREG)(SRCREG)
|
||||
REG_L t0,(2*SZREG)(SRCREG)
|
||||
REG_L t1,(3*SZREG)(SRCREG)
|
||||
PTR_ADDU SRCREG,SZREG*8
|
||||
REG_S t3,(0*SZREG)(DSTREG)
|
||||
REG_S v1,(1*SZREG)(DSTREG)
|
||||
REG_S t0,(2*SZREG)(DSTREG)
|
||||
REG_S t1,(3*SZREG)(DSTREG)
|
||||
REG_L t1,(-1*SZREG)(SRCREG)
|
||||
REG_L t0,(-2*SZREG)(SRCREG)
|
||||
REG_L v1,(-3*SZREG)(SRCREG)
|
||||
REG_L t3,(-4*SZREG)(SRCREG)
|
||||
PTR_ADDU DSTREG,SZREG*8
|
||||
REG_S t1,(-1*SZREG)(DSTREG)
|
||||
REG_S t0,(-2*SZREG)(DSTREG)
|
||||
REG_S v1,(-3*SZREG)(DSTREG)
|
||||
bne SRCREG,a3,1b
|
||||
REG_S t3,(-4*SZREG)(DSTREG)
|
||||
|
||||
/*
|
||||
* Copy a word at a time, no loop unrolling.
|
||||
*/
|
||||
2: # wordcopy
|
||||
andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG
|
||||
PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG
|
||||
beq t2,zero,3f
|
||||
PTR_ADDU t0,SRCREG,t2 # stop at t0
|
||||
PTR_SUBU SIZEREG,SIZEREG,t2
|
||||
1:
|
||||
REG_L t3,0(SRCREG)
|
||||
PTR_ADDU SRCREG,SZREG
|
||||
REG_S t3,0(DSTREG)
|
||||
bne SRCREG,t0,1b
|
||||
PTR_ADDU DSTREG,SZREG
|
||||
|
||||
3: # bytecopy
|
||||
beq SIZEREG,zero,4f # nothing left to do?
|
||||
nop
|
||||
1:
|
||||
lb t3,0(SRCREG)
|
||||
PTR_ADDU SRCREG,1
|
||||
sb t3,0(DSTREG)
|
||||
PTR_SUBU SIZEREG,1
|
||||
bgtz SIZEREG,1b
|
||||
PTR_ADDU DSTREG,1
|
||||
|
||||
4: # copydone
|
||||
j ra
|
||||
nop
|
||||
|
||||
/*
|
||||
* Copy from unaligned source to aligned dest.
|
||||
*/
|
||||
5: # destaligned
|
||||
andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG
|
||||
PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
|
||||
beq a3,zero,3b
|
||||
nop
|
||||
move SIZEREG,t0 # this many to do after we are done
|
||||
PTR_ADDU a3,SRCREG,a3 # stop point
|
||||
|
||||
1:
|
||||
REG_LHI t3,0(SRCREG)
|
||||
REG_LLO t3,SZREG-1(SRCREG)
|
||||
PTR_ADDI SRCREG,SZREG
|
||||
REG_S t3,0(DSTREG)
|
||||
bne SRCREG,a3,1b
|
||||
PTR_ADDI DSTREG,SZREG
|
||||
|
||||
b 3b
|
||||
nop
|
||||
|
||||
6: # backcopy -- based on above
|
||||
PTR_ADDU SRCREG,SIZEREG
|
||||
PTR_ADDU DSTREG,SIZEREG
|
||||
andi t1,DSTREG,SZREG-1 # get last 3 bits of dest
|
||||
bne t1,zero,3f
|
||||
andi t0,SRCREG,SZREG-1 # get last 3 bits of src
|
||||
bne t0,zero,5f
|
||||
|
||||
/*
|
||||
* Forward aligned->aligned copy, 8*4 bytes at a time.
|
||||
*/
|
||||
li AT,(-8*SZREG)
|
||||
and t0,SIZEREG,AT # count truncated to multiple of 32
|
||||
beq t0,zero,2f # any work to do?
|
||||
PTR_SUBU SIZEREG,t0
|
||||
PTR_SUBU a3,SRCREG,t0
|
||||
|
||||
/*
|
||||
* loop body
|
||||
*/
|
||||
1: # cp
|
||||
REG_L t3,(-4*SZREG)(SRCREG)
|
||||
REG_L v1,(-3*SZREG)(SRCREG)
|
||||
REG_L t0,(-2*SZREG)(SRCREG)
|
||||
REG_L t1,(-1*SZREG)(SRCREG)
|
||||
PTR_SUBU SRCREG,8*SZREG
|
||||
REG_S t3,(-4*SZREG)(DSTREG)
|
||||
REG_S v1,(-3*SZREG)(DSTREG)
|
||||
REG_S t0,(-2*SZREG)(DSTREG)
|
||||
REG_S t1,(-1*SZREG)(DSTREG)
|
||||
REG_L t1,(3*SZREG)(SRCREG)
|
||||
REG_L t0,(2*SZREG)(SRCREG)
|
||||
REG_L v1,(1*SZREG)(SRCREG)
|
||||
REG_L t3,(0*SZREG)(SRCREG)
|
||||
PTR_SUBU DSTREG,8*SZREG
|
||||
REG_S t1,(3*SZREG)(DSTREG)
|
||||
REG_S t0,(2*SZREG)(DSTREG)
|
||||
REG_S v1,(1*SZREG)(DSTREG)
|
||||
bne SRCREG,a3,1b
|
||||
REG_S t3,(0*SZREG)(DSTREG)
|
||||
|
||||
/*
|
||||
* Copy a word at a time, no loop unrolling.
|
||||
*/
|
||||
2: # wordcopy
|
||||
andi t2,SIZEREG,SZREG-1 # get byte count / 4
|
||||
PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy
|
||||
beq t2,zero,3f
|
||||
PTR_SUBU t0,SRCREG,t2 # stop at t0
|
||||
PTR_SUBU SIZEREG,SIZEREG,t2
|
||||
1:
|
||||
REG_L t3,-SZREG(SRCREG)
|
||||
PTR_SUBU SRCREG,SZREG
|
||||
REG_S t3,-SZREG(DSTREG)
|
||||
bne SRCREG,t0,1b
|
||||
PTR_SUBU DSTREG,SZREG
|
||||
|
||||
3: # bytecopy
|
||||
beq SIZEREG,zero,4f # nothing left to do?
|
||||
nop
|
||||
1:
|
||||
lb t3,-1(SRCREG)
|
||||
PTR_SUBU SRCREG,1
|
||||
sb t3,-1(DSTREG)
|
||||
PTR_SUBU SIZEREG,1
|
||||
bgtz SIZEREG,1b
|
||||
PTR_SUBU DSTREG,1
|
||||
|
||||
4: # copydone
|
||||
j ra
|
||||
nop
|
||||
|
||||
/*
|
||||
* Copy from unaligned source to aligned dest.
|
||||
*/
|
||||
5: # destaligned
|
||||
andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4
|
||||
PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
|
||||
beq a3,zero,3b
|
||||
nop
|
||||
move SIZEREG,t0 # this many to do after we are done
|
||||
PTR_SUBU a3,SRCREG,a3 # stop point
|
||||
|
||||
1:
|
||||
REG_LHI t3,-SZREG(SRCREG)
|
||||
REG_LLO t3,-1(SRCREG)
|
||||
PTR_SUBU SRCREG,SZREG
|
||||
REG_S t3,-SZREG(DSTREG)
|
||||
bne SRCREG,a3,1b
|
||||
PTR_SUBU DSTREG,SZREG
|
||||
|
||||
b 3b
|
||||
nop
|
||||
|
||||
.set reorder
|
||||
.set at
|
||||
END(memcpy)
|
@ -506,98 +506,6 @@ LEAF(fswintrberr)
|
||||
li v0, -1
|
||||
END(fswintrberr)
|
||||
|
||||
/*
|
||||
* memcpy(to, from, len)
|
||||
* {ov}bcopy(from, to, len)
|
||||
*/
|
||||
LEAF(memcpy)
|
||||
.set noreorder
|
||||
move v0, a0 # swap from and to
|
||||
move a0, a1
|
||||
move a1, v0
|
||||
ALEAF(bcopy)
|
||||
ALEAF(ovbcopy)
|
||||
.set noreorder
|
||||
PTR_ADDU t0, a0, a2 # t0 = end of s1 region
|
||||
sltu t1, a1, t0
|
||||
sltu t2, a0, a1
|
||||
and t1, t1, t2 # t1 = true if from < to < (from+len)
|
||||
beq t1, zero, forward # non overlapping, do forward copy
|
||||
slt t2, a2, 12 # check for small copy
|
||||
|
||||
ble a2, zero, 2f
|
||||
PTR_ADDU t1, a1, a2 # t1 = end of to region
|
||||
1:
|
||||
lb v1, -1(t0) # copy bytes backwards,
|
||||
PTR_SUBU t0, t0, 1 # doesnt happen often so do slow way
|
||||
PTR_SUBU t1, t1, 1
|
||||
bne t0, a0, 1b
|
||||
sb v1, 0(t1)
|
||||
2:
|
||||
j ra
|
||||
nop
|
||||
forward:
|
||||
bne t2, zero, smallcpy # do a small bcopy
|
||||
xor v1, a0, a1 # compare low two bits of addresses
|
||||
and v1, v1, 3
|
||||
PTR_SUBU a3, zero, a1 # compute # bytes to word align address
|
||||
beq v1, zero, aligned # addresses can be word aligned
|
||||
and a3, a3, 3
|
||||
|
||||
beq a3, zero, 1f
|
||||
PTR_SUBU a2, a2, a3 # subtract from remaining count
|
||||
LWHI v1, 0(a0) # get next 4 bytes (unaligned)
|
||||
LWLO v1, 3(a0)
|
||||
PTR_ADDU a0, a0, a3
|
||||
SWHI v1, 0(a1) # store 1, 2, or 3 bytes to align a1
|
||||
PTR_ADDU a1, a1, a3
|
||||
1:
|
||||
and v1, a2, 3 # compute number of words left
|
||||
PTR_SUBU a3, a2, v1
|
||||
move a2, v1
|
||||
PTR_ADDU a3, a3, a0 # compute ending address
|
||||
2:
|
||||
LWHI v1, 0(a0) # copy words a0 unaligned, a1 aligned
|
||||
LWLO v1, 3(a0)
|
||||
PTR_ADDU a0, a0, 4
|
||||
sw v1, 0(a1)
|
||||
PTR_ADDU a1, a1, 4
|
||||
bne a0, a3, 2b
|
||||
nop # We have to do this mmu-bug.
|
||||
b smallcpy
|
||||
nop
|
||||
aligned:
|
||||
beq a3, zero, 1f
|
||||
PTR_SUBU a2, a2, a3 # subtract from remaining count
|
||||
LWHI v1, 0(a0) # copy 1, 2, or 3 bytes to align
|
||||
PTR_ADDU a0, a0, a3
|
||||
SWHI v1, 0(a1)
|
||||
PTR_ADDU a1, a1, a3
|
||||
1:
|
||||
and v1, a2, 3 # compute number of whole words left
|
||||
PTR_SUBU a3, a2, v1
|
||||
move a2, v1
|
||||
PTR_ADDU a3, a3, a0 # compute ending address
|
||||
2:
|
||||
lw v1, 0(a0) # copy words
|
||||
PTR_ADDU a0, a0, 4
|
||||
sw v1, 0(a1)
|
||||
bne a0, a3, 2b
|
||||
PTR_ADDU a1, a1, 4
|
||||
smallcpy:
|
||||
ble a2, zero, 2f
|
||||
PTR_ADDU a3, a2, a0 # compute ending address
|
||||
1:
|
||||
lbu v1, 0(a0) # copy bytes
|
||||
PTR_ADDU a0, a0, 1
|
||||
sb v1, 0(a1)
|
||||
bne a0, a3, 1b
|
||||
PTR_ADDU a1, a1, 1 # MMU BUG ? can not do -1(a1) at 0x80000000!!
|
||||
2:
|
||||
j ra
|
||||
nop
|
||||
END(memcpy)
|
||||
|
||||
/*
|
||||
* memset(void *s1, int c, int len)
|
||||
* NetBSD: memset.S,v 1.3 2001/10/16 15:40:53 uch Exp
|
||||
|
Loading…
Reference in New Issue
Block a user