Rework acpi_cpu_idle() to select the next idle state before sleeping, not

after.  Unify the paths for all Cx states.  Remove cpu_idle_busy and
instead do the little profiling we need before re-enabling interrupts.
Use 1 quantum as estimate for C1 sleep duration since the timer interrupt
is the main reason we wake.

While here, change the cx_history sysctl to cx_usage and report statistics
for which idle states were used in terms of percent.  This seems more
intuitive than counters.  Remove the cx_stats structure since it's no
longer used.  Update the man page.

Change various types which do not need explicit size.
This commit is contained in:
njl 2004-06-05 07:02:18 +00:00
parent 9b15e9628d
commit ed0911a65e
2 changed files with 82 additions and 114 deletions

View File

@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd July 2, 2001
.Dd June 4, 2004
.Dt ACPI 4
.Os
.Sh NAME
@ -359,10 +359,9 @@ Maximum value for CPU throttling, equal to 100% of the clock rate.
Get or set the current throttling state, from 1 to
.Va hw.acpi.cpu.throttle_max .
This scales back the CPU clock rate and the corresponding power consumption.
.It Va hw.acpi.cpu.cx_history
Debugging information listing all sleep states and the number of
long and short sleeps for each one.
The counters are reset when
.It Va hw.acpi.cpu.cx_usage
Debugging information listing the percent of total usage for each sleep state.
The values are reset when
.Va hw.acpi.cpu.cx_lowest
is modified.
.It Va hw.acpi.cpu.cx_lowest

View File

@ -71,11 +71,6 @@ struct acpi_cx {
};
#define MAX_CX_STATES 8
struct acpi_cx_stats {
int long_slp; /* Count of sleeps >= trans_lat. */
int short_slp; /* Count of sleeps < trans_lat. */
};
struct acpi_cpu_softc {
device_t cpu_dev;
ACPI_HANDLE cpu_handle;
@ -85,6 +80,7 @@ struct acpi_cpu_softc {
struct resource *cpu_p_cnt; /* Throttling control register */
struct acpi_cx cpu_cx_states[MAX_CX_STATES];
int cpu_cx_count; /* Number of valid Cx states. */
int cpu_prev_sleep;/* Last idle sleep duration. */
};
#define CPU_GET_REG(reg, width) \
@ -124,15 +120,13 @@ static uint32_t cpu_duty_width;
static uint32_t cpu_smi_cmd; /* Value to write to SMI_CMD. */
static uint8_t cpu_pstate_cnt;/* Register to take over throttling. */
static uint8_t cpu_cst_cnt; /* Indicate we are _CST aware. */
static uint32_t cpu_rid; /* Driver-wide resource id. */
static uint32_t cpu_quirks; /* Indicate any hardware bugs. */
static int cpu_rid; /* Driver-wide resource id. */
static int cpu_quirks; /* Indicate any hardware bugs. */
/* Runtime state. */
static int cpu_cx_count; /* Number of valid states */
static uint32_t cpu_cx_next; /* State to use for next sleep. */
static uint32_t cpu_non_c3; /* Index of lowest non-C3 state. */
static struct acpi_cx_stats cpu_cx_stats[MAX_CX_STATES];
static int cpu_idle_busy; /* Count of CPUs in acpi_cpu_idle. */
static int cpu_non_c3; /* Index of lowest non-C3 state. */
static u_int cpu_cx_stats[MAX_CX_STATES];/* Cx usage history. */
/* Values for sysctl. */
static uint32_t cpu_throttle_state;
@ -164,7 +158,7 @@ static void acpi_cpu_c1(void);
static void acpi_cpu_notify(ACPI_HANDLE h, UINT32 notify, void *context);
static int acpi_cpu_quirks(struct acpi_cpu_softc *sc);
static int acpi_cpu_throttle_sysctl(SYSCTL_HANDLER_ARGS);
static int acpi_cpu_history_sysctl(SYSCTL_HANDLER_ARGS);
static int acpi_cpu_usage_sysctl(SYSCTL_HANDLER_ARGS);
static int acpi_cpu_cx_lowest_sysctl(SYSCTL_HANDLER_ARGS);
static device_method_t acpi_cpu_methods[] = {
@ -375,12 +369,8 @@ acpi_cpu_shutdown(device_t dev)
/* Disable any entry to the idle function. */
cpu_cx_count = 0;
/* Wait for all processors to exit acpi_cpu_idle(). */
/* Signal and wait for all processors to exit acpi_cpu_idle(). */
smp_rendezvous(NULL, NULL, NULL, NULL);
#if 0
while (cpu_idle_busy > 0)
#endif
DELAY(1);
return_VALUE (0);
}
@ -551,6 +541,9 @@ acpi_cpu_cx_probe(struct acpi_cpu_softc *sc)
if (sc->cpu_cx_count == 0)
return (ENXIO);
/* Use initial sleep value of 1 sec. to start with lowest idle state. */
sc->cpu_prev_sleep = 1000000;
return (0);
}
@ -757,9 +750,9 @@ acpi_cpu_startup_cx()
"lowest Cx sleep state to use");
SYSCTL_ADD_PROC(&acpi_cpu_sysctl_ctx,
SYSCTL_CHILDREN(acpi_cpu_sysctl_tree),
OID_AUTO, "cx_history", CTLTYPE_STRING | CTLFLAG_RD,
NULL, 0, acpi_cpu_history_sysctl, "A",
"count of full sleeps for Cx state / short sleeps");
OID_AUTO, "cx_usage", CTLTYPE_STRING | CTLFLAG_RD,
NULL, 0, acpi_cpu_usage_sysctl, "A",
"percent usage for each Cx state");
#ifdef notyet
/* Signal platform that we can handle _CST notification. */
@ -771,7 +764,6 @@ acpi_cpu_startup_cx()
#endif
/* Take over idling from cpu_idle_default(). */
cpu_cx_next = cpu_cx_lowest;
cpu_idle_hook = acpi_cpu_idle;
}
@ -819,8 +811,10 @@ acpi_cpu_throttle_set(uint32_t speed)
}
/*
* Idle the CPU in the lowest state possible.
* This function is called with interrupts disabled.
* Idle the CPU in the lowest state possible. This function is called with
* interrupts disabled. Note that once it re-enables interrupts, a task
* switch can occur so do not access shared data (i.e. the softc) after
* interrupts are re-enabled.
*/
static void
acpi_cpu_idle()
@ -828,7 +822,7 @@ acpi_cpu_idle()
struct acpi_cpu_softc *sc;
struct acpi_cx *cx_next;
uint32_t start_time, end_time;
int bm_active, i, asleep;
int bm_active, cx_next_idx, i;
/* If disabled, return immediately. */
if (cpu_cx_count == 0) {
@ -847,109 +841,83 @@ acpi_cpu_idle()
return;
}
/* Record that a CPU is in the idle function. */
atomic_add_int(&cpu_idle_busy, 1);
/*
* If we slept 100 us or more, use the lowest Cx state. Otherwise,
* find the lowest state that has a latency less than or equal to
* the length of our last sleep.
*/
cx_next_idx = cpu_cx_lowest;
if (sc->cpu_prev_sleep < 100)
for (i = cpu_cx_lowest; i >= 0; i--)
if (sc->cpu_cx_states[i].trans_lat <= sc->cpu_prev_sleep) {
cx_next_idx = i;
break;
}
/*
* Check for bus master activity. If there was activity, clear
* the bit and use the lowest non-C3 state. Note that the USB
* driver polling for new devices keeps this bit set all the
* time if USB is enabled.
* time if USB is loaded.
*/
AcpiGetRegister(ACPI_BITREG_BUS_MASTER_STATUS, &bm_active,
ACPI_MTX_DO_NOT_LOCK);
if (bm_active != 0) {
AcpiSetRegister(ACPI_BITREG_BUS_MASTER_STATUS, 1,
ACPI_MTX_DO_NOT_LOCK);
cpu_cx_next = min(cpu_cx_next, cpu_non_c3);
cx_next_idx = min(cx_next_idx, cpu_non_c3);
}
/* Perform the actual sleep based on the Cx-specific semantics. */
cx_next = &sc->cpu_cx_states[cpu_cx_next];
switch (cx_next->type) {
case ACPI_STATE_C0:
panic("acpi_cpu_idle: attempting to sleep in C0");
/* NOTREACHED */
case ACPI_STATE_C1:
/* Execute HLT (or equivalent) and wait for an interrupt. */
/* Select the next state and update statistics. */
cx_next = &sc->cpu_cx_states[cx_next_idx];
cpu_cx_stats[cx_next_idx]++;
KASSERT(cx_next->type != ACPI_STATE_C0, ("acpi_cpu_idle: C0 sleep"));
/*
* Execute HLT (or equivalent) and wait for an interrupt. We can't
* calculate the time spent in C1 since the place we wake up is an
* ISR. Assume we slept one quantum and return.
*/
if (cx_next->type == ACPI_STATE_C1) {
sc->cpu_prev_sleep = 1000000 / hz;
acpi_cpu_c1();
return;
}
/*
* We can't calculate the time spent in C1 since the place we
* wake up is an ISR. Use a constant time of 1 ms.
*/
start_time = 0;
end_time = 1000;
break;
case ACPI_STATE_C2:
/*
* Read from P_LVLx to enter C2, checking time spent asleep.
* Use the ACPI timer for measuring sleep time. Since we need to
* get the time very close to the CPU start/stop clock logic, this
* is the only reliable time source.
*/
AcpiHwLowLevelRead(32, &start_time, &AcpiGbl_FADT->XPmTmrBlk);
CPU_GET_REG(cx_next->p_lvlx, 1);
/*
* Read the end time twice. Since it may take an arbitrary time
* to enter the idle state, the first read may be executed before
* the processor has stopped. Doing it again provides enough
* margin that we are certain to have a correct value.
*/
AcpiHwLowLevelRead(32, &end_time, &AcpiGbl_FADT->XPmTmrBlk);
AcpiHwLowLevelRead(32, &end_time, &AcpiGbl_FADT->XPmTmrBlk);
ACPI_ENABLE_IRQS();
break;
case ACPI_STATE_C3:
default:
/* Disable bus master arbitration and enable bus master wakeup. */
/* For C3, disable bus master arbitration and enable bus master wake. */
if (cx_next->type == ACPI_STATE_C3) {
AcpiSetRegister(ACPI_BITREG_ARB_DISABLE, 1, ACPI_MTX_DO_NOT_LOCK);
AcpiSetRegister(ACPI_BITREG_BUS_MASTER_RLD, 1, ACPI_MTX_DO_NOT_LOCK);
}
/* Read from P_LVLx to enter C3, checking time spent asleep. */
AcpiHwLowLevelRead(32, &start_time, &AcpiGbl_FADT->XPmTmrBlk);
CPU_GET_REG(cx_next->p_lvlx, 1);
/*
* Read from P_LVLx to enter C2(+), checking time spent asleep.
* Use the ACPI timer for measuring sleep time. Since we need to
* get the time very close to the CPU start/stop clock logic, this
* is the only reliable time source.
*/
AcpiHwLowLevelRead(32, &start_time, &AcpiGbl_FADT->XPmTmrBlk);
CPU_GET_REG(cx_next->p_lvlx, 1);
/* Read the end time twice. See comment for C2 above. */
AcpiHwLowLevelRead(32, &end_time, &AcpiGbl_FADT->XPmTmrBlk);
AcpiHwLowLevelRead(32, &end_time, &AcpiGbl_FADT->XPmTmrBlk);
/*
* Read the end time twice. Since it may take an arbitrary time
* to enter the idle state, the first read may be executed before
* the processor has stopped. Doing it again provides enough
* margin that we are certain to have a correct value.
*/
AcpiHwLowLevelRead(32, &end_time, &AcpiGbl_FADT->XPmTmrBlk);
AcpiHwLowLevelRead(32, &end_time, &AcpiGbl_FADT->XPmTmrBlk);
/* Enable bus master arbitration and disable bus master wakeup. */
/* Enable bus master arbitration and disable bus master wakeup. */
if (cx_next->type == ACPI_STATE_C3) {
AcpiSetRegister(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK);
AcpiSetRegister(ACPI_BITREG_BUS_MASTER_RLD, 0, ACPI_MTX_DO_NOT_LOCK);
ACPI_ENABLE_IRQS();
break;
}
/* Find the actual time asleep in microseconds, minus overhead. */
end_time = acpi_TimerDelta(end_time, start_time);
asleep = PM_USEC(end_time) - cx_next->trans_lat;
/* Record statistics */
if (asleep < cx_next->trans_lat)
cpu_cx_stats[cpu_cx_next].short_slp++;
else
cpu_cx_stats[cpu_cx_next].long_slp++;
/*
* If we slept 100 us or more, use the lowest Cx state.
* Otherwise, find the lowest state that has a latency less than
* or equal to the length of our last sleep.
*/
if (asleep >= 100)
cpu_cx_next = cpu_cx_lowest;
else {
for (i = cpu_cx_lowest; i >= 0; i--) {
if (sc->cpu_cx_states[i].trans_lat <= asleep) {
cpu_cx_next = i;
break;
}
}
}
/* Decrement reference count checked by acpi_cpu_shutdown(). */
atomic_subtract_int(&cpu_idle_busy, 1);
sc->cpu_prev_sleep = PM_USEC(end_time) - cx_next->trans_lat;
ACPI_ENABLE_IRQS();
}
/* Put the CPU in C1 in a machine-dependant way. */
@ -1073,17 +1041,20 @@ acpi_cpu_throttle_sysctl(SYSCTL_HANDLER_ARGS)
}
static int
acpi_cpu_history_sysctl(SYSCTL_HANDLER_ARGS)
acpi_cpu_usage_sysctl(SYSCTL_HANDLER_ARGS)
{
struct sbuf sb;
char buf[128];
int i;
u_int sum;
/* Avoid divide by 0 potential error. */
sum = 1;
for (i = 0; i < cpu_cx_count; i++)
sum += cpu_cx_stats[i];
sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
for (i = 0; i < cpu_cx_count; i++) {
sbuf_printf(&sb, "%u/%u ", cpu_cx_stats[i].long_slp,
cpu_cx_stats[i].short_slp);
}
for (i = 0; i < cpu_cx_count; i++)
sbuf_printf(&sb, "%u%% ", (cpu_cx_stats[i] * 100) / sum);
sbuf_trim(&sb);
sbuf_finish(&sb);
sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
@ -1110,9 +1081,7 @@ acpi_cpu_cx_lowest_sysctl(SYSCTL_HANDLER_ARGS)
if (val < 0 || val > cpu_cx_count - 1)
return (EINVAL);
/* Use the new value for the next idle slice. */
cpu_cx_lowest = val;
cpu_cx_next = val;
/* If not disabling, cache the new lowest non-C3 state. */
cpu_non_c3 = 0;
@ -1124,7 +1093,7 @@ acpi_cpu_cx_lowest_sysctl(SYSCTL_HANDLER_ARGS)
}
/* Reset the statistics counters. */
memset(cpu_cx_stats, 0, sizeof(cpu_cx_stats));
bzero(cpu_cx_stats, sizeof(cpu_cx_stats));
return (0);
}