amd64: replace libkern's memset and memmove with assembly variants

memmove is repurposed bcopy (arguments swapped, return value added)
The libkern variant is a wrapper around bcopy, so this is a big
improvement.

memset is repurposed memcpy. The librkern variant is doing fishy stuff,
including branching on 0 and calling bzero.

Both functions are rather crude and subject to partial depessimization.

This is a soft prerequisite to adding variants utilizing the
'Enhanced REP MOVSB/STOSB' bit and let the kernel patch at runtime.
This commit is contained in:
mjg 2018-05-07 15:07:28 +00:00
parent 5c9b27a2b9
commit 326c556da0
2 changed files with 73 additions and 2 deletions

View File

@ -162,6 +162,58 @@ ENTRY(bcopy)
END(bcopy)
/*
* memmove(dst, src, cnt)
* rdi, rsi, rdx
* Original by:
* ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*/
ENTRY(memmove)
PUSH_FRAME_POINTER
movq %rdi,%r9
movq %rdx,%rcx
movq %rdi,%rax
subq %rsi,%rax
cmpq %rcx,%rax /* overlapping && src < dst? */
jb 1f
shrq $3,%rcx /* copy by 64-bit words */
rep
movsq
movq %rdx,%rcx
andq $7,%rcx /* any bytes left? */
rep
movsb
movq %r9,%rax
POP_FRAME_POINTER
ret
/* ALIGN_TEXT */
1:
addq %rcx,%rdi /* copy backwards */
addq %rcx,%rsi
decq %rdi
decq %rsi
andq $7,%rcx /* any fractional bytes? */
std
rep
movsb
movq %rdx,%rcx /* copy remainder by 32-bit words */
shrq $3,%rcx
subq $7,%rsi
subq $7,%rdi
rep
movsq
cld
movq %r9,%rax
POP_FRAME_POINTER
ret
END(memmove)
/*
* memcpy(dst, src, len)
* rdi, rsi, rdx
*
* Note: memcpy does not support overlapping copies
*/
ENTRY(memcpy)
@ -179,6 +231,27 @@ ENTRY(memcpy)
ret
END(memcpy)
/*
* memset(dst, c, len)
* rdi, rsi, rdx
*/
ENTRY(memset)
PUSH_FRAME_POINTER
movq %rdi,%r9
movq %rdx,%rcx
movq %rsi,%rax
shrq $3,%rcx
rep
stosq
movq %rdx,%rcx
andq $7,%rcx
rep
stosb
movq %r9,%rax
POP_FRAME_POINTER
ret
END(memset)
/*
* pagecopy(%rdi=from, %rsi=to)
*/

View File

@ -620,8 +620,6 @@ isa/vga_isa.c optional vga
kern/kern_clocksource.c standard
kern/link_elf_obj.c standard
libkern/x86/crc32_sse42.c standard
libkern/memmove.c standard
libkern/memset.c standard
#
# IA32 binary support
#