hwpstate(4): Ignore CurPstateLimit by default
Add a sysctl knob to allow users to re-enable it, and document the knob and default in cpufreq.4. (While here, add a few unrelated updates to cpufreq.4.) It seems that the register value in some hardware simply reflects the configured P-state. This results in an inadvertent and unintended outcome where the P-state can only walk down, and then the driver becomes "stuck" in the slowest possible P-state. The Linux driver never consults this register, so that's some evidence that ignoring the contents are relatively harmless. PR: 234733 Reported by: sigsys AT gmail.com, Erich Dollanksy <freebsd.ed.lists AT sumeritec.com>
This commit is contained in:
parent
a0dc3f8626
commit
43524989c5
@ -24,7 +24,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd January 22, 2020
|
||||
.Dd January 31, 2020
|
||||
.Dt CPUFREQ 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -73,9 +73,13 @@ has passed (e.g., the system has cooled sufficiently).
|
||||
If a sysctl cannot be set due to an override condition, it will return
|
||||
.Er EPERM .
|
||||
.Pp
|
||||
The frequency cannot be changed if TSC is in use as the timecounter.
|
||||
The frequency cannot be changed if TSC is in use as the timecounter and the
|
||||
hardware does not support invariant TSC.
|
||||
This is because the timecounter system needs to use a source that has a
|
||||
constant rate.
|
||||
(On invariant TSC hardware, the TSC runs at the P0 rate regardless of the
|
||||
configured P-state.)
|
||||
Modern hardware mostly has invariant TSC.
|
||||
The timecounter source can be changed with the
|
||||
.Pa kern.timecounter.hardware
|
||||
sysctl.
|
||||
@ -105,6 +109,15 @@ some systems.
|
||||
.It Va debug.cpufreq.verbose
|
||||
Print verbose messages.
|
||||
This setting is also accessible via a tunable with the same name.
|
||||
.It Va debug.hwpstate_pstate_limit
|
||||
If enabled, the AMD hwpstate driver limits administrative control of P-states
|
||||
(including by
|
||||
.Xr powerd 8 )
|
||||
to the value in the 0xc0010061 MSR, known as "PStateCurLim[CurPstateLimit]."
|
||||
It is disabled (0) by default.
|
||||
On some hardware, the limit register seems to simply follow the configured
|
||||
P-state, which results in the inability to ever raise the P-state back to P0
|
||||
from a reduced frequency state.
|
||||
.El
|
||||
.Sh SUPPORTED DRIVERS
|
||||
The following device drivers offer absolute frequency control via the
|
||||
@ -112,11 +125,15 @@ The following device drivers offer absolute frequency control via the
|
||||
interface.
|
||||
Usually, only one of these can be active at a time.
|
||||
.Pp
|
||||
.Bl -tag -compact -width ".Pa acpi_perf"
|
||||
.Bl -tag -compact -width ".Pa hwpstate_intel"
|
||||
.It Pa acpi_perf
|
||||
ACPI CPU performance states
|
||||
.It Pa est
|
||||
Intel Enhanced SpeedStep
|
||||
.It Pa hwpstate
|
||||
AMD Cool'n'Quiet2 used in K10 through Family 17h
|
||||
.It Pa hwpstate_intel
|
||||
Intel SpeedShift driver
|
||||
.It Pa ichss
|
||||
Intel SpeedStep for ICH
|
||||
.It Pa powernow
|
||||
|
@ -131,6 +131,12 @@ static int hwpstate_verify;
|
||||
SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN,
|
||||
&hwpstate_verify, 0, "Verify P-state after setting");
|
||||
|
||||
static bool hwpstate_pstate_limit;
|
||||
SYSCTL_BOOL(_debug, OID_AUTO, hwpstate_pstate_limit, CTLFLAG_RWTUN,
|
||||
&hwpstate_pstate_limit, 0,
|
||||
"If enabled (1), limit administrative control of P-states to the value in "
|
||||
"CurPstateLimit");
|
||||
|
||||
static device_method_t hwpstate_methods[] = {
|
||||
/* Device interface */
|
||||
DEVMETHOD(device_identify, hwpstate_identify),
|
||||
@ -161,7 +167,8 @@ static driver_t hwpstate_driver = {
|
||||
DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0);
|
||||
|
||||
/*
|
||||
* Go to Px-state on all cpus considering the limit.
|
||||
* Go to Px-state on all cpus, considering the limit register (if so
|
||||
* configured).
|
||||
*/
|
||||
static int
|
||||
hwpstate_goto_pstate(device_t dev, int id)
|
||||
@ -170,15 +177,16 @@ hwpstate_goto_pstate(device_t dev, int id)
|
||||
uint64_t msr;
|
||||
int cpu, i, j, limit;
|
||||
|
||||
if (hwpstate_pstate_limit) {
|
||||
/* get the current pstate limit */
|
||||
msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
|
||||
limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
|
||||
if (limit > id) {
|
||||
HWPSTATE_DEBUG(dev,
|
||||
"Restricting requested P%d to P%d due to HW limit\n", id,
|
||||
limit);
|
||||
HWPSTATE_DEBUG(dev, "Restricting requested P%d to P%d "
|
||||
"due to HW limit\n", id, limit);
|
||||
id = limit;
|
||||
}
|
||||
}
|
||||
|
||||
cpu = curcpu;
|
||||
HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu);
|
||||
|
Loading…
x
Reference in New Issue
Block a user