Add a tunable "debug.hwpstate_verify" to check P-state after changing it and

turn it off by default.  It is very inefficient to verify current P-state of
each core, especially for CPUs with many cores.  When multiple commands are
requested to the same power domain before completion of pending transitions,
the last command is executed according to the manual.  Because requests are
serialized by the caller, all cores will receive the same command for each
call.  Do not call sched_bind() and sched_unbind().  It is redundant because
the caller does it anyway.
This commit is contained in:
Jung-uk Kim 2017-11-30 01:40:07 +00:00
parent 72b27e9773
commit e374a321fe

View File

@ -123,10 +123,14 @@ static int hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev);
static int hwpstate_get_info_from_msr(device_t dev);
static int hwpstate_goto_pstate(device_t dev, int pstate_id);
static int hwpstate_verbose = 0;
static int hwpstate_verbose;
SYSCTL_INT(_debug, OID_AUTO, hwpstate_verbose, CTLFLAG_RWTUN,
&hwpstate_verbose, 0, "Debug hwpstate");
static int hwpstate_verify;
SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN,
&hwpstate_verify, 0, "Verify P-state after setting");
static device_method_t hwpstate_methods[] = {
/* Device interface */
DEVMETHOD(device_identify, hwpstate_identify),
@ -160,15 +164,13 @@ DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0);
* Go to Px-state on all cpus considering the limit.
*/
static int
hwpstate_goto_pstate(device_t dev, int pstate)
hwpstate_goto_pstate(device_t dev, int id)
{
sbintime_t sbt;
int i;
uint64_t msr;
int j;
int limit;
int id = pstate;
int error;
/* get the current pstate limit */
msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
@ -176,47 +178,57 @@ hwpstate_goto_pstate(device_t dev, int pstate)
if (limit > id)
id = limit;
HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id,
PCPU_GET(cpuid));
/* Go To Px-state */
wrmsr(MSR_AMD_10H_11H_CONTROL, id);
/*
* We are going to the same Px-state on all cpus.
* Probably should take _PSD into account.
*/
error = 0;
CPU_FOREACH(i) {
if (i == PCPU_GET(cpuid))
continue;
/* Bind to each cpu. */
thread_lock(curthread);
sched_bind(curthread, i);
thread_unlock(curthread);
HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n",
id, PCPU_GET(cpuid));
HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i);
/* Go To Px-state */
wrmsr(MSR_AMD_10H_11H_CONTROL, id);
}
CPU_FOREACH(i) {
/* Bind to each cpu. */
thread_lock(curthread);
sched_bind(curthread, i);
thread_unlock(curthread);
/* wait loop (100*100 usec is enough ?) */
for (j = 0; j < 100; j++) {
/* get the result. not assure msr=id */
msr = rdmsr(MSR_AMD_10H_11H_STATUS);
if (msr == id)
break;
sbt = SBT_1MS / 10;
tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
sbt >> tc_precexp, 0);
}
HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
(int)msr, PCPU_GET(cpuid));
if (msr != id) {
HWPSTATE_DEBUG(dev, "error: loop is not enough.\n");
error = ENXIO;
/*
* Verify whether each core is in the requested P-state.
*/
if (hwpstate_verify) {
CPU_FOREACH(i) {
thread_lock(curthread);
sched_bind(curthread, i);
thread_unlock(curthread);
/* wait loop (100*100 usec is enough ?) */
for (j = 0; j < 100; j++) {
/* get the result. not assure msr=id */
msr = rdmsr(MSR_AMD_10H_11H_STATUS);
if (msr == id)
break;
sbt = SBT_1MS / 10;
tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
sbt >> tc_precexp, 0);
}
HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
(int)msr, i);
if (msr != id) {
HWPSTATE_DEBUG(dev,
"error: loop is not enough.\n");
return (ENXIO);
}
}
}
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
return (error);
return (0);
}
static int