tsc: add RDTSCP or faster variants of get_timecount()

Use it in preference of Xfenced RDTSC if RDTSCP is supported. It is
recommended by both Intel and AMD. But, on AMD Zens and newer use
LFENCE, as recommended by AMD [*]. In particular, this means that now
AMD CPUs use more appropriate fence instead of too harsh MFENCe.

Add comment explaining the intent of the selection logic.

Reported by:	gallatin [*]
Reviewed by:	gallatin, markj
Tested by:	gallatin, pho
MFC after:	1 week
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D27986
This commit is contained in:
Konstantin Belousov 2021-01-05 23:00:14 +02:00
parent 45974de8fb
commit 9e680e4005

View File

@ -97,6 +97,8 @@ static u_int tsc_get_timecount_lfence(struct timecounter *tc);
static u_int tsc_get_timecount_low_lfence(struct timecounter *tc);
static u_int tsc_get_timecount_mfence(struct timecounter *tc);
static u_int tsc_get_timecount_low_mfence(struct timecounter *tc);
static u_int tscp_get_timecount(struct timecounter *tc);
static u_int tscp_get_timecount_low(struct timecounter *tc);
static void tsc_levels_changed(void *arg, int unit);
static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
struct timecounter *tc);
@ -628,7 +630,25 @@ init_TSC_tc(void)
init:
for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
;
if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
/*
* Timecounter implementation selection, top to bottom:
* - For AMD Zens and newer, use LFENCE;RDTSC.
* - If RDTSCP is available, use RDTSCP.
* - If fence instructions are provided (SSE2), use LFENCE;RDTSC
* on Intel, and MFENCE;RDTSC on AMD.
* - For really old CPUs, just use RDTSC.
*/
if ((cpu_vendor_id == CPU_VENDOR_AMD ||
cpu_vendor_id == CPU_VENDOR_HYGON) &&
CPUID_TO_FAMILY(cpu_id) >= 0x17) {
tsc_timecounter.tc_get_timecount = shift > 0 ?
tsc_get_timecount_low_lfence :
tsc_get_timecount_lfence;
} else if ((amd_feature & AMDID_RDTSCP) != 0) {
tsc_timecounter.tc_get_timecount = shift > 0 ?
tscp_get_timecount_low : tscp_get_timecount;
} else if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
if (cpu_vendor_id == CPU_VENDOR_AMD ||
cpu_vendor_id == CPU_VENDOR_HYGON) {
tsc_timecounter.tc_get_timecount = shift > 0 ?
@ -783,6 +803,13 @@ tsc_get_timecount(struct timecounter *tc __unused)
return (rdtsc32());
}
static u_int
tscp_get_timecount(struct timecounter *tc __unused)
{
return (rdtscp32());
}
static inline u_int
tsc_get_timecount_low(struct timecounter *tc)
{
@ -793,6 +820,16 @@ tsc_get_timecount_low(struct timecounter *tc)
return (rv);
}
static u_int
tscp_get_timecount_low(struct timecounter *tc)
{
uint32_t rv;
__asm __volatile("rdtscp; movl %1, %%ecx; shrd %%cl, %%edx, %0"
: "=a" (rv) : "m" (tc->tc_priv) : "ecx", "edx");
return (rv);
}
static u_int
tsc_get_timecount_lfence(struct timecounter *tc __unused)
{