diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 07a42e6f0f71..ace4cdfaa1a4 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -545,23 +545,20 @@ cpu_flush_dcache(void *ptr, size_t len) int cpu_est_clockrate(int cpu_id, uint64_t *rate) { + uint64_t tsc1, tsc2; + uint64_t acnt, mcnt; register_t reg; - uint64_t freq, tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); - freq = atomic_load_acq_64(&tsc_freq); - /* If TSC is P-state invariant, DELAY(9) based logic fails. */ - if (tsc_is_invariant && freq != 0) + /* + * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, + * DELAY(9) based logic fails. + */ + if (tsc_is_invariant && !tsc_perf_stat) return (EOPNOTSUPP); - /* If we're booting, trust the rate calibrated moments ago. */ - if (cold && freq != 0) { - *rate = freq; - return (0); - } - #ifdef SMP if (smp_cpus > 1) { /* Schedule ourselves on the indicated cpu. */ @@ -573,10 +570,23 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) /* Calibrate by measuring a short delay. */ reg = intr_disable(); - tsc1 = rdtsc(); - DELAY(1000); - tsc2 = rdtsc(); - intr_restore(reg); + if (tsc_is_invariant) { + wrmsr(MSR_MPERF, 0); + wrmsr(MSR_APERF, 0); + tsc1 = rdtsc(); + DELAY(1000); + mcnt = rdmsr(MSR_MPERF); + acnt = rdmsr(MSR_APERF); + tsc2 = rdtsc(); + intr_restore(reg); + *rate = (tsc2 - tsc1) / 1000 * acnt / mcnt * 1000000; + } else { + tsc1 = rdtsc(); + DELAY(1000); + tsc2 = rdtsc(); + intr_restore(reg); + *rate = (tsc2 - tsc1) * 1000; + } #ifdef SMP if (smp_cpus > 1) { @@ -586,17 +596,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) } #endif - tsc2 -= tsc1; - if (freq != 0) { - *rate = tsc2 * 1000; - return (0); - } - - /* - * Subtract 0.5% of the total. Empirical testing has shown that - * overhead in DELAY() works out to approximately this value. - */ - *rate = tsc2 * 1000 - tsc2 * 5; return (0); } diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 5869c2191ef6..50ca5fd836e4 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1136,25 +1136,22 @@ cpu_flush_dcache(void *ptr, size_t len) int cpu_est_clockrate(int cpu_id, uint64_t *rate) { + uint64_t tsc1, tsc2; + uint64_t acnt, mcnt; register_t reg; - uint64_t freq, tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); if ((cpu_feature & CPUID_TSC) == 0) return (EOPNOTSUPP); - freq = atomic_load_acq_64(&tsc_freq); - /* If TSC is P-state invariant, DELAY(9) based logic fails. */ - if (tsc_is_invariant && freq != 0) + /* + * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, + * DELAY(9) based logic fails. + */ + if (tsc_is_invariant && !tsc_perf_stat) return (EOPNOTSUPP); - /* If we're booting, trust the rate calibrated moments ago. */ - if (cold && freq != 0) { - *rate = freq; - return (0); - } - #ifdef SMP if (smp_cpus > 1) { /* Schedule ourselves on the indicated cpu. */ @@ -1166,10 +1163,23 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) /* Calibrate by measuring a short delay. */ reg = intr_disable(); - tsc1 = rdtsc(); - DELAY(1000); - tsc2 = rdtsc(); - intr_restore(reg); + if (tsc_is_invariant) { + wrmsr(MSR_MPERF, 0); + wrmsr(MSR_APERF, 0); + tsc1 = rdtsc(); + DELAY(1000); + mcnt = rdmsr(MSR_MPERF); + acnt = rdmsr(MSR_APERF); + tsc2 = rdtsc(); + intr_restore(reg); + *rate = (tsc2 - tsc1) / 1000 * acnt / mcnt * 1000000; + } else { + tsc1 = rdtsc(); + DELAY(1000); + tsc2 = rdtsc(); + intr_restore(reg); + *rate = (tsc2 - tsc1) * 1000; + } #ifdef SMP if (smp_cpus > 1) { @@ -1179,17 +1189,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) } #endif - tsc2 -= tsc1; - if (freq != 0) { - *rate = tsc2 * 1000; - return (0); - } - - /* - * Subtract 0.5% of the total. Empirical testing has shown that - * overhead in DELAY() works out to approximately this value. - */ - *rate = tsc2 * 1000 - tsc2 * 5; return (0); } diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index 4727351c84c8..853ac69911b2 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -1071,20 +1071,13 @@ cpu_flush_dcache(void *ptr, size_t len) int cpu_est_clockrate(int cpu_id, uint64_t *rate) { + uint64_t tsc1, tsc2; register_t reg; - uint64_t freq, tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); if ((cpu_feature & CPUID_TSC) == 0) return (EOPNOTSUPP); - freq = atomic_load_acq_64(&tsc_freq); - - /* If we're booting, trust the rate calibrated moments ago. */ - if (cold && freq != 0) { - *rate = freq; - return (0); - } #ifdef SMP if (smp_cpus > 1) { @@ -1101,6 +1094,7 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); + *rate = (tsc2 - tsc1) * 1000; #ifdef SMP if (smp_cpus > 1) { @@ -1110,17 +1104,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) } #endif - tsc2 -= tsc1; - if (freq != 0) { - *rate = tsc2 * 1000; - return (0); - } - - /* - * Subtract 0.5% of the total. Empirical testing has shown that - * overhead in DELAY() works out to approximately this value. - */ - *rate = tsc2 * 1000 - tsc2 * 5; return (0); }