If x86 CPU implementation of the MWAIT instruction reasonably

interacts with interrupts, query ACPI and use MWAIT for entrance into
Cx sleep states.  Support C1 "I/O then halt" mode.  See Intel'
document 302223-007 "Intelб╝ Processor Vendor-Specific ACPI Interface
Specification" for description.

Move the acpi_cpu_c1() function into x86/cpu_machdep.c and use
it instead of inlining "sti; hlt" sequence in several places.

In the acpi(4) man page, besides documenting the dev.cpu.N.cx_methods
sysctl, correct the names for dev.cpu.N.{cx_usage,cx_lowest,cx_supported}
sysctls.

Both jkim and avg have some other patches implementing the mwait
functionality; this work is unrelated.  Linux does not rely on the
ACPI to provide correct tables describing Cx modes.  Instead, the
driver has pre-defined knowledge of the CPU models, it was supplied by
Intel.

Tested by:    pho (previous versions)
Sponsored by:	The FreeBSD Foundation
This commit is contained in:
kib 2015-05-09 12:28:48 +00:00
parent 4f7fc385ca
commit 6006bf3a7d
10 changed files with 234 additions and 41 deletions

View File

@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd June 23, 2014
.Dd May 9, 2015
.Dt ACPI 4
.Os
.Sh NAME
@ -69,14 +69,12 @@ them (such as
Enable dumping Debug objects without
.Cd "options ACPI_DEBUG" .
Default is 0, ignore Debug objects.
.It Va hw.acpi.acline
AC line state (1 means online, 0 means on battery power).
.It Va hw.acpi.cpu.cx_usage
.It Va dev.cpu.N.cx_usage
Debugging information listing the percent of total usage for each sleep state.
The values are reset when
.Va hw.acpi.cpu.cx_lowest
.Va dev.cpu.N.cx_lowest
is modified.
.It Va hw.acpi.cpu.cx_lowest
.It Va dev.cpu.N.cx_lowest
Lowest Cx state to use for idling the CPU.
A scheduling algorithm will select states between
.Li C1
@ -111,6 +109,11 @@ semantics as the
state.
Deeper sleeps provide more power savings but increased transition
latency when an interrupt occurs.
.It Va dev.cpu.N.cx_method
List of supported CPU idle states and their transition methods, as
directed by the firmware.
.It Va hw.acpi.acline
AC line state (1 means online, 0 means on battery power).
.It Va hw.acpi.disable_on_reboot
Disable ACPI during the reboot process.
Most systems reboot fine with ACPI still enabled, but some require
@ -374,6 +377,14 @@ typically as a child of a PCI bus.
.Pq Vt device
Supports an ACPI laptop lid switch, which typically puts a
system to sleep.
.It Li mwait
.Pq Vt feature
Do not ask firmware for available x86-vendor specific methods to enter
.Li Cx
sleep states.
Only query and use the generic I/O-based entrance method.
The knob is provided to work around inconsistencies in the tables
filled by firmware.
.It Li quirks
.Pq Vt feature
Do not honor quirks.

View File

@ -87,13 +87,6 @@ acpi_machdep_quirks(int *quirks)
return (0);
}
void
acpi_cpu_c1()
{
__asm __volatile("sti; hlt");
}
/*
* Support for mapping ACPI tables during early boot. Currently this
* uses the crashdump map to map each table. However, the crashdump

View File

@ -91,6 +91,7 @@ struct dumperinfo;
void *alloc_fpusave(int flags);
void amd64_syscall(struct thread *td, int traced);
void busdma_swi(void);
bool cpu_mwait_usable(void);
void cpu_probe_amdc1e(void);
void cpu_setregs(void);
void doreti_iret(void) __asm(__STRING(doreti_iret));

View File

@ -47,6 +47,8 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h>
#if defined(__amd64__) || defined(__i386__)
#include <machine/clock.h>
#include <machine/specialreg.h>
#include <machine/md_var.h>
#endif
#include <sys/rman.h>
@ -70,6 +72,10 @@ struct acpi_cx {
uint32_t power; /* Power consumed (mW). */
int res_type; /* Resource type for p_lvlx. */
int res_rid; /* Resource ID for p_lvlx. */
bool do_mwait;
uint32_t mwait_hint;
bool mwait_hw_coord;
bool mwait_bm_avoidance;
};
#define MAX_CX_STATES 8
@ -128,6 +134,12 @@ struct acpi_cpu_device {
#define PIIX4_STOP_BREAK_MASK (PIIX4_BRLD_EN_IRQ0 | PIIX4_BRLD_EN_IRQ | PIIX4_BRLD_EN_IRQ8)
#define PIIX4_PCNTRL_BST_EN (1<<10)
#define CST_FFH_VENDOR_INTEL 1
#define CST_FFH_INTEL_CL_C1IO 1
#define CST_FFH_INTEL_CL_MWAIT 2
#define CST_FFH_MWAIT_HW_COORD 0x0001
#define CST_FFH_MWAIT_BM_AVOID 0x0002
/* Allow users to ignore processor orders in MADT. */
static int cpu_unordered;
SYSCTL_INT(_debug_acpi, OID_AUTO, cpu_unordered, CTLFLAG_RDTUN,
@ -179,6 +191,9 @@ static int acpi_cpu_usage_counters_sysctl(SYSCTL_HANDLER_ARGS);
static int acpi_cpu_set_cx_lowest(struct acpi_cpu_softc *sc);
static int acpi_cpu_cx_lowest_sysctl(SYSCTL_HANDLER_ARGS);
static int acpi_cpu_global_cx_lowest_sysctl(SYSCTL_HANDLER_ARGS);
#if defined(__i386__) || defined(__amd64__)
static int acpi_cpu_method_sysctl(SYSCTL_HANDLER_ARGS);
#endif
static device_method_t acpi_cpu_methods[] = {
/* Device interface */
@ -348,7 +363,18 @@ acpi_cpu_attach(device_t dev)
* so advertise this ourselves. Note this is not the same as independent
* SMP control where each CPU can have different settings.
*/
sc->cpu_features = ACPI_CAP_SMP_SAME | ACPI_CAP_SMP_SAME_C3;
sc->cpu_features = ACPI_CAP_SMP_SAME | ACPI_CAP_SMP_SAME_C3 |
ACPI_CAP_C1_IO_HALT;
#if defined(__i386__) || defined(__amd64__)
/*
* Ask for MWAIT modes if not disabled and interrupts work
* reasonable with MWAIT.
*/
if (!acpi_disabled("mwait") && cpu_mwait_usable())
sc->cpu_features |= ACPI_CAP_SMP_C1_NATIVE | ACPI_CAP_SMP_C3_NATIVE;
#endif
if (devclass_get_drivers(acpi_cpu_devclass, &drivers, &drv_count) == 0) {
for (i = 0; i < drv_count; i++) {
if (ACPI_GET_FEATURES(drivers[i], &features) == 0)
@ -720,6 +746,27 @@ acpi_cpu_generic_cx_probe(struct acpi_cpu_softc *sc)
}
}
static void
acpi_cpu_cx_cst_mwait(struct acpi_cx *cx_ptr, uint64_t address, int accsize)
{
cx_ptr->do_mwait = true;
cx_ptr->mwait_hint = address & 0xffffffff;
cx_ptr->mwait_hw_coord = (accsize & CST_FFH_MWAIT_HW_COORD) != 0;
cx_ptr->mwait_bm_avoidance = (accsize & CST_FFH_MWAIT_BM_AVOID) != 0;
}
static void
acpi_cpu_cx_cst_free_plvlx(device_t cpu_dev, struct acpi_cx *cx_ptr)
{
if (cx_ptr->p_lvlx == NULL)
return;
bus_release_resource(cpu_dev, cx_ptr->res_type, cx_ptr->res_rid,
cx_ptr->p_lvlx);
cx_ptr->p_lvlx = NULL;
}
/*
* Parse a _CST package and set up its Cx states. Since the _CST object
* can change dynamically, our notify handler may call this function
@ -734,7 +781,8 @@ acpi_cpu_cx_cst(struct acpi_cpu_softc *sc)
ACPI_OBJECT *top;
ACPI_OBJECT *pkg;
uint32_t count;
int i;
uint64_t address;
int i, vendor, class, accsize;
ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
@ -790,6 +838,30 @@ acpi_cpu_cx_cst(struct acpi_cpu_softc *sc)
/* Validate the state to see if we should use it. */
switch (cx_ptr->type) {
case ACPI_STATE_C1:
acpi_cpu_cx_cst_free_plvlx(sc->cpu_dev, cx_ptr);
#if defined(__i386__) || defined(__amd64__)
if (acpi_PkgFFH_IntelCpu(pkg, 0, &vendor, &class, &address,
&accsize) == 0 && vendor == CST_FFH_VENDOR_INTEL) {
if (class == CST_FFH_INTEL_CL_C1IO) {
/* C1 I/O then Halt */
cx_ptr->res_rid = sc->cpu_cx_count;
bus_set_resource(sc->cpu_dev, SYS_RES_IOPORT,
cx_ptr->res_rid, address, 1);
cx_ptr->p_lvlx = bus_alloc_resource_any(sc->cpu_dev,
SYS_RES_IOPORT, &cx_ptr->res_rid, RF_ACTIVE |
RF_SHAREABLE);
if (cx_ptr->p_lvlx == NULL) {
bus_delete_resource(sc->cpu_dev, SYS_RES_IOPORT,
cx_ptr->res_rid);
device_printf(sc->cpu_dev,
"C1 I/O failed to allocate port %d, "
"degrading to C1 Halt", (int)address);
}
} else if (class == CST_FFH_INTEL_CL_MWAIT) {
acpi_cpu_cx_cst_mwait(cx_ptr, address, accsize);
}
}
#endif
if (sc->cpu_cx_states[0].type == ACPI_STATE_C0) {
/* This is the first C1 state. Use the reserved slot. */
sc->cpu_cx_states[0] = *cx_ptr;
@ -818,23 +890,34 @@ acpi_cpu_cx_cst(struct acpi_cpu_softc *sc)
}
/* Free up any previous register. */
if (cx_ptr->p_lvlx != NULL) {
bus_release_resource(sc->cpu_dev, cx_ptr->res_type, cx_ptr->res_rid,
cx_ptr->p_lvlx);
cx_ptr->p_lvlx = NULL;
}
acpi_cpu_cx_cst_free_plvlx(sc->cpu_dev, cx_ptr);
/* Allocate the control register for C2 or C3. */
cx_ptr->res_rid = sc->cpu_cx_count;
acpi_PkgGas(sc->cpu_dev, pkg, 0, &cx_ptr->res_type, &cx_ptr->res_rid,
&cx_ptr->p_lvlx, RF_SHAREABLE);
if (cx_ptr->p_lvlx) {
#if defined(__i386__) || defined(__amd64__)
if (acpi_PkgFFH_IntelCpu(pkg, 0, &vendor, &class, &address,
&accsize) == 0 && vendor == CST_FFH_VENDOR_INTEL &&
class == CST_FFH_INTEL_CL_MWAIT) {
/* Native C State Instruction use (mwait) */
acpi_cpu_cx_cst_mwait(cx_ptr, address, accsize);
ACPI_DEBUG_PRINT((ACPI_DB_INFO,
"acpi_cpu%d: Got C%d - %d latency\n",
device_get_unit(sc->cpu_dev), cx_ptr->type,
cx_ptr->trans_lat));
"acpi_cpu%d: Got C%d/mwait - %d latency\n",
device_get_unit(sc->cpu_dev), cx_ptr->type, cx_ptr->trans_lat));
cx_ptr++;
sc->cpu_cx_count++;
} else
#endif
{
cx_ptr->res_rid = sc->cpu_cx_count;
acpi_PkgGas(sc->cpu_dev, pkg, 0, &cx_ptr->res_type,
&cx_ptr->res_rid, &cx_ptr->p_lvlx, RF_SHAREABLE);
if (cx_ptr->p_lvlx) {
ACPI_DEBUG_PRINT((ACPI_DB_INFO,
"acpi_cpu%d: Got C%d - %d latency\n",
device_get_unit(sc->cpu_dev), cx_ptr->type,
cx_ptr->trans_lat));
cx_ptr++;
sc->cpu_cx_count++;
}
}
}
AcpiOsFree(buf.Pointer);
@ -956,6 +1039,13 @@ acpi_cpu_startup_cx(struct acpi_cpu_softc *sc)
OID_AUTO, "cx_usage_counters", CTLTYPE_STRING | CTLFLAG_RD,
(void *)sc, 0, acpi_cpu_usage_counters_sysctl, "A",
"Cx sleep state counters");
#if defined(__i386__) || defined(__amd64__)
SYSCTL_ADD_PROC(&sc->cpu_sysctl_ctx,
SYSCTL_CHILDREN(device_get_sysctl_tree(sc->cpu_dev)),
OID_AUTO, "cx_method", CTLTYPE_STRING | CTLFLAG_RD,
(void *)sc, 0, acpi_cpu_method_sysctl, "A",
"Cx entrance methods");
#endif
/* Signal platform that we can handle _CST notification. */
if (!cpu_cx_generic && cpu_cst_cnt != 0) {
@ -1043,7 +1133,14 @@ acpi_cpu_idle(sbintime_t sbt)
*/
if (cx_next->type == ACPI_STATE_C1) {
cputicks = cpu_ticks();
acpi_cpu_c1();
if (cx_next->p_lvlx != NULL) {
/* C1 I/O then Halt */
CPU_GET_REG(cx_next->p_lvlx, 1);
}
if (cx_next->do_mwait)
acpi_cpu_idle_mwait(cx_next->mwait_hint);
else
acpi_cpu_c1();
end_time = ((cpu_ticks() - cputicks) << 20) / cpu_tickrate();
if (curthread->td_critnest == 0)
end_time = min(end_time, 500000 / hz);
@ -1055,7 +1152,7 @@ acpi_cpu_idle(sbintime_t sbt)
* For C3, disable bus master arbitration and enable bus master wake
* if BM control is available, otherwise flush the CPU cache.
*/
if (cx_next->type == ACPI_STATE_C3) {
if (cx_next->type == ACPI_STATE_C3 || cx_next->mwait_bm_avoidance) {
if ((cpu_quirks & CPU_QUIRK_NO_BM_CTRL) == 0) {
AcpiWriteBitRegister(ACPI_BITREG_ARB_DISABLE, 1);
AcpiWriteBitRegister(ACPI_BITREG_BUS_MASTER_RLD, 1);
@ -1076,7 +1173,10 @@ acpi_cpu_idle(sbintime_t sbt)
start_time = 0;
cputicks = cpu_ticks();
}
CPU_GET_REG(cx_next->p_lvlx, 1);
if (cx_next->do_mwait)
acpi_cpu_idle_mwait(cx_next->mwait_hint);
else
CPU_GET_REG(cx_next->p_lvlx, 1);
/*
* Read the end time twice. Since it may take an arbitrary time
@ -1092,8 +1192,8 @@ acpi_cpu_idle(sbintime_t sbt)
end_time = ((cpu_ticks() - cputicks) << 20) / cpu_tickrate();
/* Enable bus master arbitration and disable bus master wakeup. */
if (cx_next->type == ACPI_STATE_C3 &&
(cpu_quirks & CPU_QUIRK_NO_BM_CTRL) == 0) {
if ((cx_next->type == ACPI_STATE_C3 || cx_next->mwait_bm_avoidance) &&
(cpu_quirks & CPU_QUIRK_NO_BM_CTRL) == 0) {
AcpiWriteBitRegister(ACPI_BITREG_ARB_DISABLE, 0);
AcpiWriteBitRegister(ACPI_BITREG_BUS_MASTER_RLD, 0);
}
@ -1286,6 +1386,44 @@ acpi_cpu_usage_counters_sysctl(SYSCTL_HANDLER_ARGS)
return (0);
}
#if defined(__i386__) || defined(__amd64__)
static int
acpi_cpu_method_sysctl(SYSCTL_HANDLER_ARGS)
{
struct acpi_cpu_softc *sc;
struct acpi_cx *cx;
struct sbuf sb;
char buf[128];
int i;
sc = (struct acpi_cpu_softc *)arg1;
sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
for (i = 0; i < sc->cpu_cx_count; i++) {
cx = &sc->cpu_cx_states[i];
sbuf_printf(&sb, "C%d/", i + 1);
if (cx->do_mwait) {
sbuf_cat(&sb, "mwait");
if (cx->mwait_hw_coord)
sbuf_cat(&sb, "/hwc");
if (cx->mwait_bm_avoidance)
sbuf_cat(&sb, "/bma");
} else if (cx->type == ACPI_STATE_C1) {
sbuf_cat(&sb, "hlt");
} else {
sbuf_cat(&sb, "io");
}
if (cx->type == ACPI_STATE_C1 && cx->p_lvlx != NULL)
sbuf_cat(&sb, "/iohlt");
sbuf_putc(&sb, ' ');
}
sbuf_trim(&sb);
sbuf_finish(&sb);
sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
sbuf_delete(&sb);
return (0);
}
#endif
static int
acpi_cpu_set_cx_lowest(struct acpi_cpu_softc *sc)
{

View File

@ -120,6 +120,28 @@ acpi_PkgGas(device_t dev, ACPI_OBJECT *res, int idx, int *type, int *rid,
return (acpi_bus_alloc_gas(dev, type, rid, &gas, dst, flags));
}
int
acpi_PkgFFH_IntelCpu(ACPI_OBJECT *res, int idx, int *vendor, int *class,
uint64_t *address, int *accsize)
{
ACPI_GENERIC_ADDRESS gas;
ACPI_OBJECT *obj;
obj = &res->Package.Elements[idx];
if (obj == NULL || obj->Type != ACPI_TYPE_BUFFER ||
obj->Buffer.Length < sizeof(ACPI_GENERIC_ADDRESS) + 3)
return (EINVAL);
memcpy(&gas, obj->Buffer.Pointer + 3, sizeof(gas));
if (gas.SpaceId != ACPI_ADR_SPACE_FIXED_HARDWARE)
return (ERESTART);
*vendor = gas.BitWidth;
*class = gas.BitOffset;
*address = gas.Address;
*accsize = gas.AccessWidth;
return (0);
}
ACPI_HANDLE
acpi_GetReference(ACPI_HANDLE scope, ACPI_OBJECT *obj)
{

View File

@ -467,6 +467,8 @@ int acpi_PkgInt32(ACPI_OBJECT *res, int idx, uint32_t *dst);
int acpi_PkgStr(ACPI_OBJECT *res, int idx, void *dst, size_t size);
int acpi_PkgGas(device_t dev, ACPI_OBJECT *res, int idx, int *type,
int *rid, struct resource **dst, u_int flags);
int acpi_PkgFFH_IntelCpu(ACPI_OBJECT *res, int idx, int *vendor,
int *class, uint64_t *address, int *accsize);
ACPI_HANDLE acpi_GetReference(ACPI_HANDLE scope, ACPI_OBJECT *obj);
/*

View File

@ -106,13 +106,6 @@ acpi_machdep_quirks(int *quirks)
return (0);
}
void
acpi_cpu_c1()
{
__asm __volatile("sti; hlt");
}
/*
* Support for mapping ACPI tables during early boot. This abuses the
* crashdump map because the kernel cannot allocate KVA in

View File

@ -97,6 +97,7 @@ struct dumperinfo;
void *alloc_fpusave(int flags);
void bcopyb(const void *from, void *to, size_t len);
void busdma_swi(void);
bool cpu_mwait_usable(void);
void cpu_probe_amdc1e(void);
void cpu_setregs(void);
void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs));

View File

@ -74,6 +74,7 @@ enum intr_polarity;
void acpi_SetDefaultIntrModel(int model);
void acpi_cpu_c1(void);
void acpi_cpu_idle_mwait(uint32_t mwait_hint);
void *acpi_map_table(vm_paddr_t pa, const char *sig);
void acpi_unmap_table(void *table);
vm_paddr_t acpi_find_table(const char *sig);

View File

@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$");
#ifdef SMP
#include <machine/smp.h>
#endif
#include <x86/acpica_machdep.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@ -121,6 +122,27 @@ cpu_flush_dcache(void *ptr, size_t len)
/* Not applicable */
}
void
acpi_cpu_c1(void)
{
__asm __volatile("sti; hlt");
}
void
acpi_cpu_idle_mwait(uint32_t mwait_hint)
{
int *state;
state = (int *)PCPU_PTR(monitorbuf);
/*
* XXXKIB. Software coordination mode should be supported,
* but all Intel CPUs provide hardware coordination.
*/
cpu_monitor(state, 0, 0);
cpu_mwait(MWAIT_INTRBREAK, mwait_hint);
}
/* Get current clock frequency for the given cpu id. */
int
cpu_est_clockrate(int cpu_id, uint64_t *rate)
@ -194,6 +216,15 @@ cpu_halt(void)
halt();
}
bool
cpu_mwait_usable(void)
{
return ((cpu_feature2 & CPUID2_MON) != 0 && ((cpu_mon_mwait_flags &
(CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK)) ==
(CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK)));
}
void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */
static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
@ -220,7 +251,7 @@ cpu_idle_acpi(sbintime_t sbt)
else if (cpu_idle_hook)
cpu_idle_hook(sbt);
else
__asm __volatile("sti; hlt");
acpi_cpu_c1();
*state = STATE_RUNNING;
}
#endif /* !PC98 */
@ -253,7 +284,7 @@ cpu_idle_hlt(sbintime_t sbt)
if (sched_runnable())
enable_intr();
else
__asm __volatile("sti; hlt");
acpi_cpu_c1();
*state = STATE_RUNNING;
}