amd64: replace libkern's memset and memmove with assembly variants
memmove is repurposed bcopy (arguments swapped, return value added) The libkern variant is a wrapper around bcopy, so this is a big improvement. memset is repurposed memcpy. The librkern variant is doing fishy stuff, including branching on 0 and calling bzero. Both functions are rather crude and subject to partial depessimization. This is a soft prerequisite to adding variants utilizing the 'Enhanced REP MOVSB/STOSB' bit and let the kernel patch at runtime.
This commit is contained in:
parent
5c9b27a2b9
commit
326c556da0
@ -162,6 +162,58 @@ ENTRY(bcopy)
|
||||
END(bcopy)
|
||||
|
||||
/*
|
||||
* memmove(dst, src, cnt)
|
||||
* rdi, rsi, rdx
|
||||
* Original by:
|
||||
* ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
|
||||
*/
|
||||
ENTRY(memmove)
|
||||
PUSH_FRAME_POINTER
|
||||
movq %rdi,%r9
|
||||
movq %rdx,%rcx
|
||||
|
||||
movq %rdi,%rax
|
||||
subq %rsi,%rax
|
||||
cmpq %rcx,%rax /* overlapping && src < dst? */
|
||||
jb 1f
|
||||
|
||||
shrq $3,%rcx /* copy by 64-bit words */
|
||||
rep
|
||||
movsq
|
||||
movq %rdx,%rcx
|
||||
andq $7,%rcx /* any bytes left? */
|
||||
rep
|
||||
movsb
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
|
||||
/* ALIGN_TEXT */
|
||||
1:
|
||||
addq %rcx,%rdi /* copy backwards */
|
||||
addq %rcx,%rsi
|
||||
decq %rdi
|
||||
decq %rsi
|
||||
andq $7,%rcx /* any fractional bytes? */
|
||||
std
|
||||
rep
|
||||
movsb
|
||||
movq %rdx,%rcx /* copy remainder by 32-bit words */
|
||||
shrq $3,%rcx
|
||||
subq $7,%rsi
|
||||
subq $7,%rdi
|
||||
rep
|
||||
movsq
|
||||
cld
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
END(memmove)
|
||||
|
||||
/*
|
||||
* memcpy(dst, src, len)
|
||||
* rdi, rsi, rdx
|
||||
*
|
||||
* Note: memcpy does not support overlapping copies
|
||||
*/
|
||||
ENTRY(memcpy)
|
||||
@ -179,6 +231,27 @@ ENTRY(memcpy)
|
||||
ret
|
||||
END(memcpy)
|
||||
|
||||
/*
|
||||
* memset(dst, c, len)
|
||||
* rdi, rsi, rdx
|
||||
*/
|
||||
ENTRY(memset)
|
||||
PUSH_FRAME_POINTER
|
||||
movq %rdi,%r9
|
||||
movq %rdx,%rcx
|
||||
movq %rsi,%rax
|
||||
shrq $3,%rcx
|
||||
rep
|
||||
stosq
|
||||
movq %rdx,%rcx
|
||||
andq $7,%rcx
|
||||
rep
|
||||
stosb
|
||||
movq %r9,%rax
|
||||
POP_FRAME_POINTER
|
||||
ret
|
||||
END(memset)
|
||||
|
||||
/*
|
||||
* pagecopy(%rdi=from, %rsi=to)
|
||||
*/
|
||||
|
@ -620,8 +620,6 @@ isa/vga_isa.c optional vga
|
||||
kern/kern_clocksource.c standard
|
||||
kern/link_elf_obj.c standard
|
||||
libkern/x86/crc32_sse42.c standard
|
||||
libkern/memmove.c standard
|
||||
libkern/memset.c standard
|
||||
#
|
||||
# IA32 binary support
|
||||
#
|
||||
|
Loading…
x
Reference in New Issue
Block a user