Use MFENCE to serialize RDTSC on non-Intel CPUs.

Kernel already used the stronger barrier instruction for AMDs, correct
the userspace fast gettimeofday() implementation as well.

Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
Differential revision:	https://reviews.freebsd.org/D11728
This commit is contained in:
Konstantin Belousov 2017-07-27 08:37:07 +00:00
parent dd269a0bf4
commit 9a3083660d

View File

@ -52,57 +52,108 @@ __FBSDID("$FreeBSD$");
#endif #endif
#include "libc_private.h" #include "libc_private.h"
static enum LMB {
LMB_UNKNOWN,
LMB_NONE,
LMB_MFENCE,
LMB_LFENCE
} lfence_works = LMB_UNKNOWN;
static void static void
lfence_mb(void) cpuidp(u_int leaf, u_int p[4])
{
__asm __volatile(
#if defined(__i386__)
" pushl %%ebx\n"
#endif
" cpuid\n"
#if defined(__i386__)
" movl %%ebx,%1\n"
" popl %%ebx"
#endif
: "=a" (p[0]),
#if defined(__i386__)
"=r" (p[1]),
#elif defined(__amd64__)
"=b" (p[1]),
#else
#error "Arch"
#endif
"=c" (p[2]), "=d" (p[3])
: "0" (leaf));
}
static enum LMB
select_lmb(void)
{
u_int p[4];
static const char intel_id[] = "GenuntelineI";
cpuidp(0, p);
return (memcmp(p + 1, intel_id, sizeof(intel_id) - 1) == 0 ?
LMB_LFENCE : LMB_MFENCE);
}
static void
init_fence(void)
{ {
#if defined(__i386__) #if defined(__i386__)
static int lfence_works = -1;
u_int cpuid_supported, p[4]; u_int cpuid_supported, p[4];
if (lfence_works == -1) { __asm __volatile(
__asm __volatile( " pushfl\n"
" pushfl\n" " popl %%eax\n"
" popl %%eax\n" " movl %%eax,%%ecx\n"
" movl %%eax,%%ecx\n" " xorl $0x200000,%%eax\n"
" xorl $0x200000,%%eax\n" " pushl %%eax\n"
" pushl %%eax\n" " popfl\n"
" popfl\n" " pushfl\n"
" pushfl\n" " popl %%eax\n"
" popl %%eax\n" " xorl %%eax,%%ecx\n"
" xorl %%eax,%%ecx\n" " je 1f\n"
" je 1f\n" " movl $1,%0\n"
" movl $1,%0\n" " jmp 2f\n"
" jmp 2f\n" "1: movl $0,%0\n"
"1: movl $0,%0\n" "2:\n"
"2:\n" : "=r" (cpuid_supported) : : "eax", "ecx", "cc");
: "=r" (cpuid_supported) : : "eax", "ecx", "cc"); if (cpuid_supported) {
if (cpuid_supported) { cpuidp(0x1, p);
__asm __volatile( if ((p[3] & CPUID_SSE2) != 0)
" pushl %%ebx\n" lfence_works = select_lmb();
" cpuid\n" } else
" movl %%ebx,%1\n" lfence_works = LMB_NONE;
" popl %%ebx\n"
: "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), "=d" (p[3])
: "0" (0x1));
lfence_works = (p[3] & CPUID_SSE2) != 0;
} else
lfence_works = 0;
}
if (lfence_works == 1)
lfence();
#elif defined(__amd64__) #elif defined(__amd64__)
lfence(); lfence_works = select_lmb();
#else #else
#error "arch" #error "Arch"
#endif #endif
} }
static void
rdtsc_mb(void)
{
again:
if (__predict_true(lfence_works == LMB_LFENCE)) {
lfence();
return;
} else if (lfence_works == LMB_MFENCE) {
mfence();
return;
} else if (lfence_works == LMB_NONE) {
return;
}
init_fence();
goto again;
}
static u_int static u_int
__vdso_gettc_rdtsc_low(const struct vdso_timehands *th) __vdso_gettc_rdtsc_low(const struct vdso_timehands *th)
{ {
u_int rv; u_int rv;
lfence_mb(); rdtsc_mb();
__asm __volatile("rdtsc; shrd %%cl, %%edx, %0" __asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
: "=a" (rv) : "c" (th->th_x86_shift) : "edx"); : "=a" (rv) : "c" (th->th_x86_shift) : "edx");
return (rv); return (rv);
@ -112,7 +163,7 @@ static u_int
__vdso_rdtsc32(void) __vdso_rdtsc32(void)
{ {
lfence_mb(); rdtsc_mb();
return (rdtsc32()); return (rdtsc32());
} }
@ -212,7 +263,7 @@ __vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc)
scale = tsc_ref->tsc_scale; scale = tsc_ref->tsc_scale;
ofs = tsc_ref->tsc_ofs; ofs = tsc_ref->tsc_ofs;
lfence_mb(); rdtsc_mb();
tsc = rdtsc(); tsc = rdtsc();
/* ret = ((tsc * scale) >> 64) + ofs */ /* ret = ((tsc * scale) >> 64) + ofs */