_ARM_ARCH_5E is always defined, we not support older CPUs.
This commit is contained in:
parent
c5426ce3a6
commit
f72366f927
@ -44,145 +44,6 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include "assym.inc"
|
||||
|
||||
#ifndef _ARM_ARCH_5E
|
||||
|
||||
/* #define BIG_LOOPS */
|
||||
|
||||
/*
|
||||
* bcopy_page(src, dest)
|
||||
*
|
||||
* Optimised copy page routine.
|
||||
*
|
||||
* On entry:
|
||||
* r0 - src address
|
||||
* r1 - dest address
|
||||
*
|
||||
* Requires:
|
||||
* number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
|
||||
* otherwise.
|
||||
*/
|
||||
|
||||
#define CHUNK_SIZE 32
|
||||
|
||||
#define PREFETCH_FIRST_CHUNK /* nothing */
|
||||
#define PREFETCH_NEXT_CHUNK /* nothing */
|
||||
|
||||
#ifndef COPY_CHUNK
|
||||
#define COPY_CHUNK \
|
||||
PREFETCH_NEXT_CHUNK ; \
|
||||
ldmia r0!, {r3-r8,ip,lr} ; \
|
||||
stmia r1!, {r3-r8,ip,lr}
|
||||
#endif /* ! COPY_CHUNK */
|
||||
|
||||
#ifndef SAVE_REGS
|
||||
#define SAVE_REGS stmfd sp!, {r4-r8, lr}; _SAVE({r4-r8, lr})
|
||||
#define RESTORE_REGS ldmfd sp!, {r4-r8, pc}
|
||||
#endif
|
||||
|
||||
ENTRY(bcopy_page)
|
||||
PREFETCH_FIRST_CHUNK
|
||||
SAVE_REGS
|
||||
#ifdef BIG_LOOPS
|
||||
mov r2, #(PAGE_SIZE >> 9)
|
||||
#else
|
||||
mov r2, #(PAGE_SIZE >> 7)
|
||||
#endif
|
||||
|
||||
1:
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
|
||||
#ifdef BIG_LOOPS
|
||||
/* There is little point making the loop any larger; unless we are
|
||||
running with the cache off, the load/store overheads will
|
||||
completely dominate this loop. */
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
COPY_CHUNK
|
||||
#endif
|
||||
subs r2, r2, #1
|
||||
bne 1b
|
||||
|
||||
RESTORE_REGS /* ...and return. */
|
||||
END(bcopy_page)
|
||||
|
||||
/*
|
||||
* bzero_page(dest)
|
||||
*
|
||||
* Optimised zero page routine.
|
||||
*
|
||||
* On entry:
|
||||
* r0 - dest address
|
||||
*
|
||||
* Requires:
|
||||
* number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
|
||||
* otherwise
|
||||
*/
|
||||
|
||||
ENTRY(bzero_page)
|
||||
stmfd sp!, {r4-r8, lr}
|
||||
_SAVE({r4-r8, lr})
|
||||
#ifdef BIG_LOOPS
|
||||
mov r2, #(PAGE_SIZE >> 9)
|
||||
#else
|
||||
mov r2, #(PAGE_SIZE >> 7)
|
||||
#endif
|
||||
mov r3, #0
|
||||
mov r4, #0
|
||||
mov r5, #0
|
||||
mov r6, #0
|
||||
mov r7, #0
|
||||
mov r8, #0
|
||||
mov ip, #0
|
||||
mov lr, #0
|
||||
|
||||
1:
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
|
||||
#ifdef BIG_LOOPS
|
||||
/* There is little point making the loop any larger; unless we are
|
||||
running with the cache off, the load/store overheads will
|
||||
completely dominate this loop. */
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
stmia r0!, {r3-r8,ip,lr}
|
||||
|
||||
#endif
|
||||
|
||||
subs r2, r2, #1
|
||||
bne 1b
|
||||
|
||||
ldmfd sp!, {r4-r8, pc}
|
||||
END(bzero_page)
|
||||
|
||||
#else /* _ARM_ARCH_5E */
|
||||
|
||||
/*
|
||||
* armv5e version of bcopy_page
|
||||
@ -279,4 +140,3 @@ ENTRY(bzero_page)
|
||||
bne 1b
|
||||
RET
|
||||
END(bzero_page)
|
||||
#endif /* _ARM_ARCH_5E */
|
||||
|
@ -47,510 +47,7 @@
|
||||
.word _C_LABEL(_min_memcpy_size)
|
||||
|
||||
__FBSDID("$FreeBSD$");
|
||||
#ifdef _ARM_ARCH_5E
|
||||
#include <arm/arm/bcopyinout_xscale.S>
|
||||
#else
|
||||
|
||||
.text
|
||||
.align 2
|
||||
|
||||
#define GET_PCB(tmp) \
|
||||
mrc p15, 0, tmp, c13, c0, 4; \
|
||||
add tmp, tmp, #(TD_PCB)
|
||||
|
||||
#define SAVE_REGS stmfd sp!, {r4-r11}; _SAVE({r4-r11})
|
||||
#define RESTORE_REGS ldmfd sp!, {r4-r11}
|
||||
|
||||
#if defined(_ARM_ARCH_5E)
|
||||
#define HELLOCPP #
|
||||
#define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ]
|
||||
#else
|
||||
#define PREFETCH(rx,o)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* r0 = user space address
|
||||
* r1 = kernel space address
|
||||
* r2 = length
|
||||
*
|
||||
* Copies bytes from user space to kernel space
|
||||
*
|
||||
* We save/restore r4-r11:
|
||||
* r4-r11 are scratch
|
||||
*/
|
||||
ENTRY(copyin)
|
||||
/* Quick exit if length is zero */
|
||||
teq r2, #0
|
||||
moveq r0, #0
|
||||
RETeq
|
||||
|
||||
adds r3, r0, r2
|
||||
movcs r0, #EFAULT
|
||||
RETc(cs)
|
||||
|
||||
ldr r12, =(VM_MAXUSER_ADDRESS + 1)
|
||||
cmp r3, r12
|
||||
movcs r0, #EFAULT
|
||||
RETc(cs)
|
||||
|
||||
ldr r3, .L_arm_memcpy
|
||||
ldr r3, [r3]
|
||||
cmp r3, #0
|
||||
beq .Lnormal
|
||||
ldr r3, .L_min_memcpy_size
|
||||
ldr r3, [r3]
|
||||
cmp r2, r3
|
||||
blt .Lnormal
|
||||
stmfd sp!, {r0-r2, r4, lr}
|
||||
mov r3, r0
|
||||
mov r0, r1
|
||||
mov r1, r3
|
||||
mov r3, #2 /* SRC_IS_USER */
|
||||
ldr r4, .L_arm_memcpy
|
||||
mov lr, pc
|
||||
ldr pc, [r4]
|
||||
cmp r0, #0
|
||||
ldmfd sp!, {r0-r2, r4, lr}
|
||||
moveq r0, #0
|
||||
RETeq
|
||||
|
||||
.Lnormal:
|
||||
SAVE_REGS
|
||||
GET_PCB(r4)
|
||||
ldr r4, [r4]
|
||||
|
||||
|
||||
ldr r5, [r4, #PCB_ONFAULT]
|
||||
adr r3, .Lcopyfault
|
||||
str r3, [r4, #PCB_ONFAULT]
|
||||
|
||||
PREFETCH(r0, 0)
|
||||
PREFETCH(r1, 0)
|
||||
|
||||
/*
|
||||
* If not too many bytes, take the slow path.
|
||||
*/
|
||||
cmp r2, #0x08
|
||||
blt .Licleanup
|
||||
|
||||
/*
|
||||
* Align destination to word boundary.
|
||||
*/
|
||||
and r6, r1, #0x3
|
||||
ldr pc, [pc, r6, lsl #2]
|
||||
b .Lialend
|
||||
.word .Lialend
|
||||
.word .Lial3
|
||||
.word .Lial2
|
||||
.word .Lial1
|
||||
.Lial3: ldrbt r6, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strb r6, [r1], #1
|
||||
.Lial2: ldrbt r7, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strb r7, [r1], #1
|
||||
.Lial1: ldrbt r6, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strb r6, [r1], #1
|
||||
.Lialend:
|
||||
|
||||
/*
|
||||
* If few bytes left, finish slow.
|
||||
*/
|
||||
cmp r2, #0x08
|
||||
blt .Licleanup
|
||||
|
||||
/*
|
||||
* If source is not aligned, finish slow.
|
||||
*/
|
||||
ands r3, r0, #0x03
|
||||
bne .Licleanup
|
||||
|
||||
cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
|
||||
blt .Licleanup8
|
||||
|
||||
/*
|
||||
* Align destination to cacheline boundary.
|
||||
* If source and destination are nicely aligned, this can be a big
|
||||
* win. If not, it's still cheaper to copy in groups of 32 even if
|
||||
* we don't get the nice cacheline alignment.
|
||||
*/
|
||||
and r6, r1, #0x1f
|
||||
ldr pc, [pc, r6]
|
||||
b .Licaligned
|
||||
.word .Licaligned
|
||||
.word .Lical28
|
||||
.word .Lical24
|
||||
.word .Lical20
|
||||
.word .Lical16
|
||||
.word .Lical12
|
||||
.word .Lical8
|
||||
.word .Lical4
|
||||
.Lical28:ldrt r6, [r0], #4
|
||||
sub r2, r2, #4
|
||||
str r6, [r1], #4
|
||||
.Lical24:ldrt r7, [r0], #4
|
||||
sub r2, r2, #4
|
||||
str r7, [r1], #4
|
||||
.Lical20:ldrt r6, [r0], #4
|
||||
sub r2, r2, #4
|
||||
str r6, [r1], #4
|
||||
.Lical16:ldrt r7, [r0], #4
|
||||
sub r2, r2, #4
|
||||
str r7, [r1], #4
|
||||
.Lical12:ldrt r6, [r0], #4
|
||||
sub r2, r2, #4
|
||||
str r6, [r1], #4
|
||||
.Lical8:ldrt r7, [r0], #4
|
||||
sub r2, r2, #4
|
||||
str r7, [r1], #4
|
||||
.Lical4:ldrt r6, [r0], #4
|
||||
sub r2, r2, #4
|
||||
str r6, [r1], #4
|
||||
|
||||
/*
|
||||
* We start with > 0x40 bytes to copy (>= 0x60 got us into this
|
||||
* part of the code, and we may have knocked that down by as much
|
||||
* as 0x1c getting aligned).
|
||||
*
|
||||
* This loop basically works out to:
|
||||
* do {
|
||||
* prefetch-next-cacheline(s)
|
||||
* bytes -= 0x20;
|
||||
* copy cacheline
|
||||
* } while (bytes >= 0x40);
|
||||
* bytes -= 0x20;
|
||||
* copy cacheline
|
||||
*/
|
||||
.Licaligned:
|
||||
PREFETCH(r0, 32)
|
||||
PREFETCH(r1, 32)
|
||||
|
||||
sub r2, r2, #0x20
|
||||
|
||||
/* Copy a cacheline */
|
||||
ldrt r10, [r0], #4
|
||||
ldrt r11, [r0], #4
|
||||
ldrt r6, [r0], #4
|
||||
ldrt r7, [r0], #4
|
||||
ldrt r8, [r0], #4
|
||||
ldrt r9, [r0], #4
|
||||
stmia r1!, {r10-r11}
|
||||
ldrt r10, [r0], #4
|
||||
ldrt r11, [r0], #4
|
||||
stmia r1!, {r6-r11}
|
||||
|
||||
cmp r2, #0x40
|
||||
bge .Licaligned
|
||||
|
||||
sub r2, r2, #0x20
|
||||
|
||||
/* Copy a cacheline */
|
||||
ldrt r10, [r0], #4
|
||||
ldrt r11, [r0], #4
|
||||
ldrt r6, [r0], #4
|
||||
ldrt r7, [r0], #4
|
||||
ldrt r8, [r0], #4
|
||||
ldrt r9, [r0], #4
|
||||
stmia r1!, {r10-r11}
|
||||
ldrt r10, [r0], #4
|
||||
ldrt r11, [r0], #4
|
||||
stmia r1!, {r6-r11}
|
||||
|
||||
cmp r2, #0x08
|
||||
blt .Liprecleanup
|
||||
|
||||
.Licleanup8:
|
||||
ldrt r8, [r0], #4
|
||||
ldrt r9, [r0], #4
|
||||
sub r2, r2, #8
|
||||
stmia r1!, {r8, r9}
|
||||
cmp r2, #8
|
||||
bge .Licleanup8
|
||||
|
||||
.Liprecleanup:
|
||||
/*
|
||||
* If we're done, bail.
|
||||
*/
|
||||
cmp r2, #0
|
||||
beq .Lout
|
||||
|
||||
.Licleanup:
|
||||
and r6, r2, #0x3
|
||||
ldr pc, [pc, r6, lsl #2]
|
||||
b .Licend
|
||||
.word .Lic4
|
||||
.word .Lic1
|
||||
.word .Lic2
|
||||
.word .Lic3
|
||||
.Lic4: ldrbt r6, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strb r6, [r1], #1
|
||||
.Lic3: ldrbt r7, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strb r7, [r1], #1
|
||||
.Lic2: ldrbt r6, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strb r6, [r1], #1
|
||||
.Lic1: ldrbt r7, [r0], #1
|
||||
subs r2, r2, #1
|
||||
strb r7, [r1], #1
|
||||
.Licend:
|
||||
bne .Licleanup
|
||||
|
||||
.Liout:
|
||||
mov r0, #0
|
||||
|
||||
str r5, [r4, #PCB_ONFAULT]
|
||||
RESTORE_REGS
|
||||
|
||||
RET
|
||||
|
||||
.Lcopyfault:
|
||||
ldr r0, =EFAULT
|
||||
str r5, [r4, #PCB_ONFAULT]
|
||||
RESTORE_REGS
|
||||
|
||||
RET
|
||||
END(copyin)
|
||||
|
||||
/*
|
||||
* r0 = kernel space address
|
||||
* r1 = user space address
|
||||
* r2 = length
|
||||
*
|
||||
* Copies bytes from kernel space to user space
|
||||
*
|
||||
* We save/restore r4-r11:
|
||||
* r4-r11 are scratch
|
||||
*/
|
||||
|
||||
ENTRY(copyout)
|
||||
/* Quick exit if length is zero */
|
||||
teq r2, #0
|
||||
moveq r0, #0
|
||||
RETeq
|
||||
|
||||
adds r3, r1, r2
|
||||
movcs r0, #EFAULT
|
||||
RETc(cs)
|
||||
|
||||
ldr r12, =(VM_MAXUSER_ADDRESS + 1)
|
||||
cmp r3, r12
|
||||
movcs r0, #EFAULT
|
||||
RETc(cs)
|
||||
|
||||
ldr r3, .L_arm_memcpy
|
||||
ldr r3, [r3]
|
||||
cmp r3, #0
|
||||
beq .Lnormale
|
||||
ldr r3, .L_min_memcpy_size
|
||||
ldr r3, [r3]
|
||||
cmp r2, r3
|
||||
blt .Lnormale
|
||||
stmfd sp!, {r0-r2, r4, lr}
|
||||
_SAVE({r0-r2, r4, lr})
|
||||
mov r3, r0
|
||||
mov r0, r1
|
||||
mov r1, r3
|
||||
mov r3, #1 /* DST_IS_USER */
|
||||
ldr r4, .L_arm_memcpy
|
||||
mov lr, pc
|
||||
ldr pc, [r4]
|
||||
cmp r0, #0
|
||||
ldmfd sp!, {r0-r2, r4, lr}
|
||||
moveq r0, #0
|
||||
RETeq
|
||||
|
||||
.Lnormale:
|
||||
SAVE_REGS
|
||||
GET_PCB(r4)
|
||||
ldr r4, [r4]
|
||||
|
||||
ldr r5, [r4, #PCB_ONFAULT]
|
||||
adr r3, .Lcopyfault
|
||||
str r3, [r4, #PCB_ONFAULT]
|
||||
|
||||
PREFETCH(r0, 0)
|
||||
PREFETCH(r1, 0)
|
||||
|
||||
/*
|
||||
* If not too many bytes, take the slow path.
|
||||
*/
|
||||
cmp r2, #0x08
|
||||
blt .Lcleanup
|
||||
|
||||
/*
|
||||
* Align destination to word boundary.
|
||||
*/
|
||||
and r6, r1, #0x3
|
||||
ldr pc, [pc, r6, lsl #2]
|
||||
b .Lalend
|
||||
.word .Lalend
|
||||
.word .Lal3
|
||||
.word .Lal2
|
||||
.word .Lal1
|
||||
.Lal3: ldrb r6, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strbt r6, [r1], #1
|
||||
.Lal2: ldrb r7, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strbt r7, [r1], #1
|
||||
.Lal1: ldrb r6, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strbt r6, [r1], #1
|
||||
.Lalend:
|
||||
|
||||
/*
|
||||
* If few bytes left, finish slow.
|
||||
*/
|
||||
cmp r2, #0x08
|
||||
blt .Lcleanup
|
||||
|
||||
/*
|
||||
* If source is not aligned, finish slow.
|
||||
*/
|
||||
ands r3, r0, #0x03
|
||||
bne .Lcleanup
|
||||
|
||||
cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
|
||||
blt .Lcleanup8
|
||||
|
||||
/*
|
||||
* Align source & destination to cacheline boundary.
|
||||
*/
|
||||
and r6, r1, #0x1f
|
||||
ldr pc, [pc, r6]
|
||||
b .Lcaligned
|
||||
.word .Lcaligned
|
||||
.word .Lcal28
|
||||
.word .Lcal24
|
||||
.word .Lcal20
|
||||
.word .Lcal16
|
||||
.word .Lcal12
|
||||
.word .Lcal8
|
||||
.word .Lcal4
|
||||
.Lcal28:ldr r6, [r0], #4
|
||||
sub r2, r2, #4
|
||||
strt r6, [r1], #4
|
||||
.Lcal24:ldr r7, [r0], #4
|
||||
sub r2, r2, #4
|
||||
strt r7, [r1], #4
|
||||
.Lcal20:ldr r6, [r0], #4
|
||||
sub r2, r2, #4
|
||||
strt r6, [r1], #4
|
||||
.Lcal16:ldr r7, [r0], #4
|
||||
sub r2, r2, #4
|
||||
strt r7, [r1], #4
|
||||
.Lcal12:ldr r6, [r0], #4
|
||||
sub r2, r2, #4
|
||||
strt r6, [r1], #4
|
||||
.Lcal8: ldr r7, [r0], #4
|
||||
sub r2, r2, #4
|
||||
strt r7, [r1], #4
|
||||
.Lcal4: ldr r6, [r0], #4
|
||||
sub r2, r2, #4
|
||||
strt r6, [r1], #4
|
||||
|
||||
/*
|
||||
* We start with > 0x40 bytes to copy (>= 0x60 got us into this
|
||||
* part of the code, and we may have knocked that down by as much
|
||||
* as 0x1c getting aligned).
|
||||
*
|
||||
* This loop basically works out to:
|
||||
* do {
|
||||
* prefetch-next-cacheline(s)
|
||||
* bytes -= 0x20;
|
||||
* copy cacheline
|
||||
* } while (bytes >= 0x40);
|
||||
* bytes -= 0x20;
|
||||
* copy cacheline
|
||||
*/
|
||||
.Lcaligned:
|
||||
PREFETCH(r0, 32)
|
||||
PREFETCH(r1, 32)
|
||||
|
||||
sub r2, r2, #0x20
|
||||
|
||||
/* Copy a cacheline */
|
||||
ldmia r0!, {r6-r11}
|
||||
strt r6, [r1], #4
|
||||
strt r7, [r1], #4
|
||||
ldmia r0!, {r6-r7}
|
||||
strt r8, [r1], #4
|
||||
strt r9, [r1], #4
|
||||
strt r10, [r1], #4
|
||||
strt r11, [r1], #4
|
||||
strt r6, [r1], #4
|
||||
strt r7, [r1], #4
|
||||
|
||||
cmp r2, #0x40
|
||||
bge .Lcaligned
|
||||
|
||||
sub r2, r2, #0x20
|
||||
|
||||
/* Copy a cacheline */
|
||||
ldmia r0!, {r6-r11}
|
||||
strt r6, [r1], #4
|
||||
strt r7, [r1], #4
|
||||
ldmia r0!, {r6-r7}
|
||||
strt r8, [r1], #4
|
||||
strt r9, [r1], #4
|
||||
strt r10, [r1], #4
|
||||
strt r11, [r1], #4
|
||||
strt r6, [r1], #4
|
||||
strt r7, [r1], #4
|
||||
|
||||
cmp r2, #0x08
|
||||
blt .Lprecleanup
|
||||
|
||||
.Lcleanup8:
|
||||
ldmia r0!, {r8-r9}
|
||||
sub r2, r2, #8
|
||||
strt r8, [r1], #4
|
||||
strt r9, [r1], #4
|
||||
cmp r2, #8
|
||||
bge .Lcleanup8
|
||||
|
||||
.Lprecleanup:
|
||||
/*
|
||||
* If we're done, bail.
|
||||
*/
|
||||
cmp r2, #0
|
||||
beq .Lout
|
||||
|
||||
.Lcleanup:
|
||||
and r6, r2, #0x3
|
||||
ldr pc, [pc, r6, lsl #2]
|
||||
b .Lcend
|
||||
.word .Lc4
|
||||
.word .Lc1
|
||||
.word .Lc2
|
||||
.word .Lc3
|
||||
.Lc4: ldrb r6, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strbt r6, [r1], #1
|
||||
.Lc3: ldrb r7, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strbt r7, [r1], #1
|
||||
.Lc2: ldrb r6, [r0], #1
|
||||
sub r2, r2, #1
|
||||
strbt r6, [r1], #1
|
||||
.Lc1: ldrb r7, [r0], #1
|
||||
subs r2, r2, #1
|
||||
strbt r7, [r1], #1
|
||||
.Lcend:
|
||||
bne .Lcleanup
|
||||
|
||||
.Lout:
|
||||
mov r0, #0
|
||||
|
||||
str r5, [r4, #PCB_ONFAULT]
|
||||
RESTORE_REGS
|
||||
|
||||
RET
|
||||
END(copyout)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* int badaddr_read_1(const uint8_t *src, uint8_t *dest)
|
||||
|
@ -116,9 +116,7 @@ END(do_cksum)
|
||||
*/
|
||||
/* LINTSTUB: Ignore */
|
||||
ASENTRY_NP(L_cksumdata)
|
||||
#ifdef _ARM_ARCH_5E
|
||||
pld [r0] /* Pre-fetch the start of the buffer */
|
||||
#endif
|
||||
mov r2, #0
|
||||
|
||||
/* We first have to word-align the buffer. */
|
||||
@ -144,7 +142,6 @@ ASENTRY_NP(L_cksumdata)
|
||||
|
||||
/* Buffer is now word aligned */
|
||||
.Lcksumdata_wordaligned:
|
||||
#ifdef _ARM_ARCH_5E
|
||||
cmp r1, #0x04 /* Less than 4 bytes left? */
|
||||
blt .Lcksumdata_endgame /* Yup */
|
||||
|
||||
@ -199,43 +196,10 @@ ASENTRY_NP(L_cksumdata)
|
||||
adcs r2, r2, r7
|
||||
adc r2, r2, #0x00
|
||||
|
||||
#else /* !_ARM_ARCH_5E */
|
||||
|
||||
subs r1, r1, #0x40
|
||||
blt .Lcksumdata_bigloop_end
|
||||
|
||||
.Lcksumdata_bigloop:
|
||||
ldmia r0!, {r3, r4, r5, r6}
|
||||
adds r2, r2, r3
|
||||
adcs r2, r2, r4
|
||||
adcs r2, r2, r5
|
||||
ldmia r0!, {r3, r4, r5, r7}
|
||||
adcs r2, r2, r6
|
||||
adcs r2, r2, r3
|
||||
adcs r2, r2, r4
|
||||
adcs r2, r2, r5
|
||||
ldmia r0!, {r3, r4, r5, r6}
|
||||
adcs r2, r2, r7
|
||||
adcs r2, r2, r3
|
||||
adcs r2, r2, r4
|
||||
adcs r2, r2, r5
|
||||
ldmia r0!, {r3, r4, r5, r7}
|
||||
adcs r2, r2, r6
|
||||
adcs r2, r2, r3
|
||||
adcs r2, r2, r4
|
||||
adcs r2, r2, r5
|
||||
adcs r2, r2, r7
|
||||
adc r2, r2, #0x00
|
||||
subs r1, r1, #0x40
|
||||
bge .Lcksumdata_bigloop
|
||||
.Lcksumdata_bigloop_end:
|
||||
#endif
|
||||
|
||||
adds r1, r1, #0x40
|
||||
RETeq
|
||||
cmp r1, #0x20
|
||||
|
||||
#ifdef _ARM_ARCH_5E
|
||||
ldrdge r4, [r0], #0x08 /* Avoid stalling pld and result */
|
||||
blt .Lcksumdata_less_than_32
|
||||
pld [r0, #0x18]
|
||||
@ -250,19 +214,6 @@ ASENTRY_NP(L_cksumdata)
|
||||
adcs r2, r2, r5
|
||||
adcs r2, r2, r6 /* XXX: Unavoidable result stall */
|
||||
adcs r2, r2, r7
|
||||
#else
|
||||
blt .Lcksumdata_less_than_32
|
||||
ldmia r0!, {r3, r4, r5, r6}
|
||||
adds r2, r2, r3
|
||||
adcs r2, r2, r4
|
||||
adcs r2, r2, r5
|
||||
ldmia r0!, {r3, r4, r5, r7}
|
||||
adcs r2, r2, r6
|
||||
adcs r2, r2, r3
|
||||
adcs r2, r2, r4
|
||||
adcs r2, r2, r5
|
||||
adcs r2, r2, r7
|
||||
#endif
|
||||
adc r2, r2, #0x00
|
||||
subs r1, r1, #0x20
|
||||
RETeq
|
||||
|
@ -107,8 +107,8 @@ __FBSDID("$FreeBSD$");
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef _ARM_ARCH_5E
|
||||
#error FreeBSD requires ARMv5 or later
|
||||
#ifndef _ARM_ARCH_6
|
||||
#error FreeBSD requires ARMv6 or later
|
||||
#endif
|
||||
|
||||
struct pcpu __pcpu[MAXCPU];
|
||||
|
@ -149,17 +149,11 @@ do_memset:
|
||||
/* We are now word aligned */
|
||||
.Lmemset_wordaligned:
|
||||
orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
|
||||
#ifdef _ARM_ARCH_5E
|
||||
tst ip, #0x04 /* Quad-align for armv5e */
|
||||
#else
|
||||
cmp r1, #0x10
|
||||
#endif
|
||||
orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
|
||||
#ifdef _ARM_ARCH_5E
|
||||
subne r1, r1, #0x04 /* Quad-align if necessary */
|
||||
strne r3, [ip], #0x04
|
||||
cmp r1, #0x10
|
||||
#endif
|
||||
blt .Lmemset_loop4 /* If less than 16 then use words */
|
||||
mov r2, r3 /* Duplicate data */
|
||||
cmp r1, #0x80 /* If < 128 then skip the big loop */
|
||||
@ -168,7 +162,6 @@ do_memset:
|
||||
/* Do 128 bytes at a time */
|
||||
.Lmemset_loop128:
|
||||
subs r1, r1, #0x80
|
||||
#ifdef _ARM_ARCH_5E
|
||||
strdge r2, [ip], #0x08
|
||||
strdge r2, [ip], #0x08
|
||||
strdge r2, [ip], #0x08
|
||||
@ -185,24 +178,6 @@ do_memset:
|
||||
strdge r2, [ip], #0x08
|
||||
strdge r2, [ip], #0x08
|
||||
strdge r2, [ip], #0x08
|
||||
#else
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
#endif
|
||||
bgt .Lmemset_loop128
|
||||
RETeq /* Zero length so just exit */
|
||||
|
||||
@ -211,30 +186,18 @@ do_memset:
|
||||
/* Do 32 bytes at a time */
|
||||
.Lmemset_loop32:
|
||||
subs r1, r1, #0x20
|
||||
#ifdef _ARM_ARCH_5E
|
||||
strdge r2, [ip], #0x08
|
||||
strdge r2, [ip], #0x08
|
||||
strdge r2, [ip], #0x08
|
||||
strdge r2, [ip], #0x08
|
||||
#else
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
#endif
|
||||
bgt .Lmemset_loop32
|
||||
RETeq /* Zero length so just exit */
|
||||
|
||||
adds r1, r1, #0x10 /* Partially adjust for extra sub */
|
||||
|
||||
/* Deal with 16 bytes or more */
|
||||
#ifdef _ARM_ARCH_5E
|
||||
strdge r2, [ip], #0x08
|
||||
strdge r2, [ip], #0x08
|
||||
#else
|
||||
stmiage ip!, {r2-r3}
|
||||
stmiage ip!, {r2-r3}
|
||||
#endif
|
||||
RETeq /* Zero length so just exit */
|
||||
|
||||
addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
|
||||
@ -246,14 +209,10 @@ do_memset:
|
||||
bgt .Lmemset_loop4
|
||||
RETeq /* Zero length so just exit */
|
||||
|
||||
#ifdef _ARM_ARCH_5E
|
||||
/* Compensate for 64-bit alignment check */
|
||||
adds r1, r1, #0x04
|
||||
RETeq
|
||||
cmp r1, #2
|
||||
#else
|
||||
cmp r1, #-2
|
||||
#endif
|
||||
|
||||
strb r3, [ip], #0x01 /* Set 1 byte */
|
||||
strbge r3, [ip], #0x01 /* Set another byte */
|
||||
@ -804,243 +763,6 @@ EENTRY(memmove)
|
||||
EEND(memmove)
|
||||
END(bcopy)
|
||||
|
||||
#if !defined(_ARM_ARCH_5E)
|
||||
ENTRY(memcpy)
|
||||
/* save leaf functions having to store this away */
|
||||
/* Do not check arm_memcpy if we're running from flash */
|
||||
#if defined(FLASHADDR) && defined(PHYSADDR)
|
||||
#if FLASHADDR > PHYSADDR
|
||||
ldr r3, =FLASHADDR
|
||||
cmp r3, pc
|
||||
bls .Lnormal
|
||||
#else
|
||||
ldr r3, =FLASHADDR
|
||||
cmp r3, pc
|
||||
bhi .Lnormal
|
||||
#endif
|
||||
#endif
|
||||
ldr r3, .L_arm_memcpy
|
||||
ldr r3, [r3]
|
||||
cmp r3, #0
|
||||
beq .Lnormal
|
||||
ldr r3, .L_min_memcpy_size
|
||||
ldr r3, [r3]
|
||||
cmp r2, r3
|
||||
blt .Lnormal
|
||||
stmfd sp!, {r0-r2, r4, lr}
|
||||
mov r3, #0
|
||||
ldr r4, .L_arm_memcpy
|
||||
mov lr, pc
|
||||
ldr pc, [r4]
|
||||
cmp r0, #0
|
||||
ldmfd sp!, {r0-r2, r4, lr}
|
||||
RETeq
|
||||
|
||||
.Lnormal:
|
||||
stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
|
||||
|
||||
subs r2, r2, #4
|
||||
blt .Lmemcpy_l4 /* less than 4 bytes */
|
||||
ands r12, r0, #3
|
||||
bne .Lmemcpy_destul /* oh unaligned destination addr */
|
||||
ands r12, r1, #3
|
||||
bne .Lmemcpy_srcul /* oh unaligned source addr */
|
||||
|
||||
.Lmemcpy_t8:
|
||||
/* We have aligned source and destination */
|
||||
subs r2, r2, #8
|
||||
blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
|
||||
subs r2, r2, #0x14
|
||||
blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
|
||||
stmdb sp!, {r4} /* borrow r4 */
|
||||
|
||||
/* blat 32 bytes at a time */
|
||||
/* XXX for really big copies perhaps we should use more registers */
|
||||
.Lmemcpy_loop32:
|
||||
ldmia r1!, {r3, r4, r12, lr}
|
||||
stmia r0!, {r3, r4, r12, lr}
|
||||
ldmia r1!, {r3, r4, r12, lr}
|
||||
stmia r0!, {r3, r4, r12, lr}
|
||||
subs r2, r2, #0x20
|
||||
bge .Lmemcpy_loop32
|
||||
|
||||
cmn r2, #0x10
|
||||
ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
|
||||
stmiage r0!, {r3, r4, r12, lr}
|
||||
subge r2, r2, #0x10
|
||||
ldmia sp!, {r4} /* return r4 */
|
||||
|
||||
.Lmemcpy_l32:
|
||||
adds r2, r2, #0x14
|
||||
|
||||
/* blat 12 bytes at a time */
|
||||
.Lmemcpy_loop12:
|
||||
ldmiage r1!, {r3, r12, lr}
|
||||
stmiage r0!, {r3, r12, lr}
|
||||
subsge r2, r2, #0x0c
|
||||
bge .Lmemcpy_loop12
|
||||
|
||||
.Lmemcpy_l12:
|
||||
adds r2, r2, #8
|
||||
blt .Lmemcpy_l4
|
||||
|
||||
subs r2, r2, #4
|
||||
ldrlt r3, [r1], #4
|
||||
strlt r3, [r0], #4
|
||||
ldmiage r1!, {r3, r12}
|
||||
stmiage r0!, {r3, r12}
|
||||
subge r2, r2, #4
|
||||
|
||||
.Lmemcpy_l4:
|
||||
/* less than 4 bytes to go */
|
||||
adds r2, r2, #4
|
||||
#ifdef __APCS_26_
|
||||
ldmiaeq sp!, {r0, pc}^ /* done */
|
||||
#else
|
||||
ldmiaeq sp!, {r0, pc} /* done */
|
||||
#endif
|
||||
/* copy the crud byte at a time */
|
||||
cmp r2, #2
|
||||
ldrb r3, [r1], #1
|
||||
strb r3, [r0], #1
|
||||
ldrbge r3, [r1], #1
|
||||
strbge r3, [r0], #1
|
||||
ldrbgt r3, [r1], #1
|
||||
strbgt r3, [r0], #1
|
||||
ldmia sp!, {r0, pc}
|
||||
|
||||
/* erg - unaligned destination */
|
||||
.Lmemcpy_destul:
|
||||
rsb r12, r12, #4
|
||||
cmp r12, #2
|
||||
|
||||
/* align destination with byte copies */
|
||||
ldrb r3, [r1], #1
|
||||
strb r3, [r0], #1
|
||||
ldrbge r3, [r1], #1
|
||||
strbge r3, [r0], #1
|
||||
ldrbgt r3, [r1], #1
|
||||
strbgt r3, [r0], #1
|
||||
subs r2, r2, r12
|
||||
blt .Lmemcpy_l4 /* less the 4 bytes */
|
||||
|
||||
ands r12, r1, #3
|
||||
beq .Lmemcpy_t8 /* we have an aligned source */
|
||||
|
||||
/* erg - unaligned source */
|
||||
/* This is where it gets nasty ... */
|
||||
.Lmemcpy_srcul:
|
||||
bic r1, r1, #3
|
||||
ldr lr, [r1], #4
|
||||
cmp r12, #2
|
||||
bgt .Lmemcpy_srcul3
|
||||
beq .Lmemcpy_srcul2
|
||||
cmp r2, #0x0c
|
||||
blt .Lmemcpy_srcul1loop4
|
||||
sub r2, r2, #0x0c
|
||||
stmdb sp!, {r4, r5}
|
||||
|
||||
.Lmemcpy_srcul1loop16:
|
||||
mov r3, lr, lsr #8
|
||||
ldmia r1!, {r4, r5, r12, lr}
|
||||
orr r3, r3, r4, lsl #24
|
||||
mov r4, r4, lsr #8
|
||||
orr r4, r4, r5, lsl #24
|
||||
mov r5, r5, lsr #8
|
||||
orr r5, r5, r12, lsl #24
|
||||
mov r12, r12, lsr #8
|
||||
orr r12, r12, lr, lsl #24
|
||||
stmia r0!, {r3-r5, r12}
|
||||
subs r2, r2, #0x10
|
||||
bge .Lmemcpy_srcul1loop16
|
||||
ldmia sp!, {r4, r5}
|
||||
adds r2, r2, #0x0c
|
||||
blt .Lmemcpy_srcul1l4
|
||||
|
||||
.Lmemcpy_srcul1loop4:
|
||||
mov r12, lr, lsr #8
|
||||
ldr lr, [r1], #4
|
||||
orr r12, r12, lr, lsl #24
|
||||
str r12, [r0], #4
|
||||
subs r2, r2, #4
|
||||
bge .Lmemcpy_srcul1loop4
|
||||
|
||||
.Lmemcpy_srcul1l4:
|
||||
sub r1, r1, #3
|
||||
b .Lmemcpy_l4
|
||||
|
||||
.Lmemcpy_srcul2:
|
||||
cmp r2, #0x0c
|
||||
blt .Lmemcpy_srcul2loop4
|
||||
sub r2, r2, #0x0c
|
||||
stmdb sp!, {r4, r5}
|
||||
|
||||
.Lmemcpy_srcul2loop16:
|
||||
mov r3, lr, lsr #16
|
||||
ldmia r1!, {r4, r5, r12, lr}
|
||||
orr r3, r3, r4, lsl #16
|
||||
mov r4, r4, lsr #16
|
||||
orr r4, r4, r5, lsl #16
|
||||
mov r5, r5, lsr #16
|
||||
orr r5, r5, r12, lsl #16
|
||||
mov r12, r12, lsr #16
|
||||
orr r12, r12, lr, lsl #16
|
||||
stmia r0!, {r3-r5, r12}
|
||||
subs r2, r2, #0x10
|
||||
bge .Lmemcpy_srcul2loop16
|
||||
ldmia sp!, {r4, r5}
|
||||
adds r2, r2, #0x0c
|
||||
blt .Lmemcpy_srcul2l4
|
||||
|
||||
.Lmemcpy_srcul2loop4:
|
||||
mov r12, lr, lsr #16
|
||||
ldr lr, [r1], #4
|
||||
orr r12, r12, lr, lsl #16
|
||||
str r12, [r0], #4
|
||||
subs r2, r2, #4
|
||||
bge .Lmemcpy_srcul2loop4
|
||||
|
||||
.Lmemcpy_srcul2l4:
|
||||
sub r1, r1, #2
|
||||
b .Lmemcpy_l4
|
||||
|
||||
.Lmemcpy_srcul3:
|
||||
cmp r2, #0x0c
|
||||
blt .Lmemcpy_srcul3loop4
|
||||
sub r2, r2, #0x0c
|
||||
stmdb sp!, {r4, r5}
|
||||
|
||||
.Lmemcpy_srcul3loop16:
|
||||
mov r3, lr, lsr #24
|
||||
ldmia r1!, {r4, r5, r12, lr}
|
||||
orr r3, r3, r4, lsl #8
|
||||
mov r4, r4, lsr #24
|
||||
orr r4, r4, r5, lsl #8
|
||||
mov r5, r5, lsr #24
|
||||
orr r5, r5, r12, lsl #8
|
||||
mov r12, r12, lsr #24
|
||||
orr r12, r12, lr, lsl #8
|
||||
stmia r0!, {r3-r5, r12}
|
||||
subs r2, r2, #0x10
|
||||
bge .Lmemcpy_srcul3loop16
|
||||
ldmia sp!, {r4, r5}
|
||||
adds r2, r2, #0x0c
|
||||
blt .Lmemcpy_srcul3l4
|
||||
|
||||
.Lmemcpy_srcul3loop4:
|
||||
mov r12, lr, lsr #24
|
||||
ldr lr, [r1], #4
|
||||
orr r12, r12, lr, lsl #8
|
||||
str r12, [r0], #4
|
||||
subs r2, r2, #4
|
||||
bge .Lmemcpy_srcul3loop4
|
||||
|
||||
.Lmemcpy_srcul3l4:
|
||||
sub r1, r1, #1
|
||||
b .Lmemcpy_l4
|
||||
END(memcpy)
|
||||
|
||||
#else
|
||||
/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
|
||||
ENTRY(memcpy)
|
||||
pld [r1]
|
||||
@ -2398,23 +2120,3 @@ ENTRY(memcpy)
|
||||
strb r1, [r0, #0x0b]
|
||||
RET
|
||||
END(memcpy)
|
||||
#endif /* _ARM_ARCH_5E */
|
||||
|
||||
#ifdef GPROF
|
||||
|
||||
ENTRY(user)
|
||||
nop
|
||||
END(user)
|
||||
ENTRY(btrap)
|
||||
nop
|
||||
END(btrap)
|
||||
ENTRY(etrap)
|
||||
nop
|
||||
END(etrap)
|
||||
ENTRY(bintr)
|
||||
nop
|
||||
END(bintr)
|
||||
ENTRY(eintr)
|
||||
nop
|
||||
END(eintr)
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user