amd64: implement ERMS-based memmove, memcpy and memset

Reviewed by:	kib
Approved by:	re (gjb)
Differential Revision:	https://reviews.freebsd.org/D17124
This commit is contained in:
mjg 2018-09-13 14:53:51 +00:00
parent 0d0ccdbf0d
commit be73db354f
2 changed files with 90 additions and 6 deletions

View File

@ -131,6 +131,7 @@ __FBSDID("$FreeBSD$");
#include <machine/trap.h>
#include <machine/tss.h>
#include <x86/ucode.h>
#include <x86/ifunc.h>
#ifdef SMP
#include <machine/smp.h>
#endif
@ -2661,3 +2662,34 @@ outb_(u_short port, u_char data)
}
#endif /* KDB */
#undef memset
#undef memmove
#undef memcpy
void *memset_std(void *buf, int c, size_t len);
void *memset_erms(void *buf, int c, size_t len);
DEFINE_IFUNC(, void *, memset, (void *, int, size_t), static)
{
return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
memset_erms : memset_std);
}
void *memmove_std(void * _Nonnull dst, const void * _Nonnull src, size_t len);
void *memmove_erms(void * _Nonnull dst, const void * _Nonnull src, size_t len);
DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull, size_t), static)
{
return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
memmove_erms : memmove_std);
}
void *memcpy_std(void * _Nonnull dst, const void * _Nonnull src, size_t len);
void *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src, size_t len);
DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull, size_t), static)
{
return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
memcpy_erms : memcpy_std);
}

View File

@ -96,7 +96,7 @@ END(sse2_pagezero)
* Adapted from bcopy written by:
* ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*/
ENTRY(memmove)
ENTRY(memmove_std)
PUSH_FRAME_POINTER
movq %rdi,%r9
movq %rdx,%rcx
@ -142,7 +142,37 @@ ENTRY(memmove)
movq %r9,%rax
POP_FRAME_POINTER
ret
END(memmove)
END(memmove_std)
ENTRY(memmove_erms)
PUSH_FRAME_POINTER
movq %rdi,%r9
movq %rdx,%rcx
movq %rdi,%rax
subq %rsi,%rax
cmpq %rcx,%rax /* overlapping && src < dst? */
jb 1f
rep
movsb
movq %r9,%rax
POP_FRAME_POINTER
ret
1:
addq %rcx,%rdi /* copy backwards */
addq %rcx,%rsi
decq %rdi
decq %rsi
std
rep
movsb
cld
movq %r9,%rax
POP_FRAME_POINTER
ret
END(memmove_erms)
/*
* memcpy(dst, src, len)
@ -150,7 +180,7 @@ END(memmove)
*
* Note: memcpy does not support overlapping copies
*/
ENTRY(memcpy)
ENTRY(memcpy_std)
PUSH_FRAME_POINTER
movq %rdi,%rax
movq %rdx,%rcx
@ -167,13 +197,23 @@ ENTRY(memcpy)
movsb
POP_FRAME_POINTER
ret
END(memcpy)
END(memcpy_std)
ENTRY(memcpy_erms)
PUSH_FRAME_POINTER
movq %rdi,%rax
movq %rdx,%rcx
rep
movsb
POP_FRAME_POINTER
ret
END(memcpy_erms)
/*
* memset(dst, c, len)
* rdi, rsi, rdx
*/
ENTRY(memset)
ENTRY(memset_std)
PUSH_FRAME_POINTER
movq %rdi,%r9
movq %rdx,%rcx
@ -195,7 +235,19 @@ ENTRY(memset)
movq %r9,%rax
POP_FRAME_POINTER
ret
END(memset)
END(memset_std)
ENTRY(memset_erms)
PUSH_FRAME_POINTER
movq %rdi,%r9
movq %rdx,%rcx
movb %sil,%al
rep
stosb
movq %r9,%rax
POP_FRAME_POINTER
ret
END(memset_erms)
/* fillw(pat, base, cnt) */
/* %rdi,%rsi, %rdx */