diff --git a/share/man/man4/acpi.4 b/share/man/man4/acpi.4 index 17cf1cbab65b..8c221be870aa 100644 --- a/share/man/man4/acpi.4 +++ b/share/man/man4/acpi.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd June 23, 2014 +.Dd May 9, 2015 .Dt ACPI 4 .Os .Sh NAME @@ -69,14 +69,12 @@ them (such as Enable dumping Debug objects without .Cd "options ACPI_DEBUG" . Default is 0, ignore Debug objects. -.It Va hw.acpi.acline -AC line state (1 means online, 0 means on battery power). -.It Va hw.acpi.cpu.cx_usage +.It Va dev.cpu.N.cx_usage Debugging information listing the percent of total usage for each sleep state. The values are reset when -.Va hw.acpi.cpu.cx_lowest +.Va dev.cpu.N.cx_lowest is modified. -.It Va hw.acpi.cpu.cx_lowest +.It Va dev.cpu.N.cx_lowest Lowest Cx state to use for idling the CPU. A scheduling algorithm will select states between .Li C1 @@ -111,6 +109,11 @@ semantics as the state. Deeper sleeps provide more power savings but increased transition latency when an interrupt occurs. +.It Va dev.cpu.N.cx_method +List of supported CPU idle states and their transition methods, as +directed by the firmware. +.It Va hw.acpi.acline +AC line state (1 means online, 0 means on battery power). .It Va hw.acpi.disable_on_reboot Disable ACPI during the reboot process. Most systems reboot fine with ACPI still enabled, but some require @@ -374,6 +377,14 @@ typically as a child of a PCI bus. .Pq Vt device Supports an ACPI laptop lid switch, which typically puts a system to sleep. +.It Li mwait +.Pq Vt feature +Do not ask firmware for available x86-vendor specific methods to enter +.Li Cx +sleep states. +Only query and use the generic I/O-based entrance method. +The knob is provided to work around inconsistencies in the tables +filled by firmware. .It Li quirks .Pq Vt feature Do not honor quirks. diff --git a/sys/amd64/acpica/acpi_machdep.c b/sys/amd64/acpica/acpi_machdep.c index 049b51bb4e9f..8f88a00e3275 100644 --- a/sys/amd64/acpica/acpi_machdep.c +++ b/sys/amd64/acpica/acpi_machdep.c @@ -87,13 +87,6 @@ acpi_machdep_quirks(int *quirks) return (0); } -void -acpi_cpu_c1() -{ - - __asm __volatile("sti; hlt"); -} - /* * Support for mapping ACPI tables during early boot. Currently this * uses the crashdump map to map each table. However, the crashdump diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 9083421642c7..0813e5fb74d3 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -91,6 +91,7 @@ struct dumperinfo; void *alloc_fpusave(int flags); void amd64_syscall(struct thread *td, int traced); void busdma_swi(void); +bool cpu_mwait_usable(void); void cpu_probe_amdc1e(void); void cpu_setregs(void); void doreti_iret(void) __asm(__STRING(doreti_iret)); diff --git a/sys/dev/acpica/acpi_cpu.c b/sys/dev/acpica/acpi_cpu.c index 8df27829d93e..a24ba4ea5dab 100644 --- a/sys/dev/acpica/acpi_cpu.c +++ b/sys/dev/acpica/acpi_cpu.c @@ -47,6 +47,8 @@ __FBSDID("$FreeBSD$"); #include #if defined(__amd64__) || defined(__i386__) #include +#include +#include #endif #include @@ -70,6 +72,10 @@ struct acpi_cx { uint32_t power; /* Power consumed (mW). */ int res_type; /* Resource type for p_lvlx. */ int res_rid; /* Resource ID for p_lvlx. */ + bool do_mwait; + uint32_t mwait_hint; + bool mwait_hw_coord; + bool mwait_bm_avoidance; }; #define MAX_CX_STATES 8 @@ -128,6 +134,12 @@ struct acpi_cpu_device { #define PIIX4_STOP_BREAK_MASK (PIIX4_BRLD_EN_IRQ0 | PIIX4_BRLD_EN_IRQ | PIIX4_BRLD_EN_IRQ8) #define PIIX4_PCNTRL_BST_EN (1<<10) +#define CST_FFH_VENDOR_INTEL 1 +#define CST_FFH_INTEL_CL_C1IO 1 +#define CST_FFH_INTEL_CL_MWAIT 2 +#define CST_FFH_MWAIT_HW_COORD 0x0001 +#define CST_FFH_MWAIT_BM_AVOID 0x0002 + /* Allow users to ignore processor orders in MADT. */ static int cpu_unordered; SYSCTL_INT(_debug_acpi, OID_AUTO, cpu_unordered, CTLFLAG_RDTUN, @@ -179,6 +191,9 @@ static int acpi_cpu_usage_counters_sysctl(SYSCTL_HANDLER_ARGS); static int acpi_cpu_set_cx_lowest(struct acpi_cpu_softc *sc); static int acpi_cpu_cx_lowest_sysctl(SYSCTL_HANDLER_ARGS); static int acpi_cpu_global_cx_lowest_sysctl(SYSCTL_HANDLER_ARGS); +#if defined(__i386__) || defined(__amd64__) +static int acpi_cpu_method_sysctl(SYSCTL_HANDLER_ARGS); +#endif static device_method_t acpi_cpu_methods[] = { /* Device interface */ @@ -348,7 +363,18 @@ acpi_cpu_attach(device_t dev) * so advertise this ourselves. Note this is not the same as independent * SMP control where each CPU can have different settings. */ - sc->cpu_features = ACPI_CAP_SMP_SAME | ACPI_CAP_SMP_SAME_C3; + sc->cpu_features = ACPI_CAP_SMP_SAME | ACPI_CAP_SMP_SAME_C3 | + ACPI_CAP_C1_IO_HALT; + +#if defined(__i386__) || defined(__amd64__) + /* + * Ask for MWAIT modes if not disabled and interrupts work + * reasonable with MWAIT. + */ + if (!acpi_disabled("mwait") && cpu_mwait_usable()) + sc->cpu_features |= ACPI_CAP_SMP_C1_NATIVE | ACPI_CAP_SMP_C3_NATIVE; +#endif + if (devclass_get_drivers(acpi_cpu_devclass, &drivers, &drv_count) == 0) { for (i = 0; i < drv_count; i++) { if (ACPI_GET_FEATURES(drivers[i], &features) == 0) @@ -720,6 +746,27 @@ acpi_cpu_generic_cx_probe(struct acpi_cpu_softc *sc) } } +static void +acpi_cpu_cx_cst_mwait(struct acpi_cx *cx_ptr, uint64_t address, int accsize) +{ + + cx_ptr->do_mwait = true; + cx_ptr->mwait_hint = address & 0xffffffff; + cx_ptr->mwait_hw_coord = (accsize & CST_FFH_MWAIT_HW_COORD) != 0; + cx_ptr->mwait_bm_avoidance = (accsize & CST_FFH_MWAIT_BM_AVOID) != 0; +} + +static void +acpi_cpu_cx_cst_free_plvlx(device_t cpu_dev, struct acpi_cx *cx_ptr) +{ + + if (cx_ptr->p_lvlx == NULL) + return; + bus_release_resource(cpu_dev, cx_ptr->res_type, cx_ptr->res_rid, + cx_ptr->p_lvlx); + cx_ptr->p_lvlx = NULL; +} + /* * Parse a _CST package and set up its Cx states. Since the _CST object * can change dynamically, our notify handler may call this function @@ -734,7 +781,8 @@ acpi_cpu_cx_cst(struct acpi_cpu_softc *sc) ACPI_OBJECT *top; ACPI_OBJECT *pkg; uint32_t count; - int i; + uint64_t address; + int i, vendor, class, accsize; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); @@ -790,6 +838,30 @@ acpi_cpu_cx_cst(struct acpi_cpu_softc *sc) /* Validate the state to see if we should use it. */ switch (cx_ptr->type) { case ACPI_STATE_C1: + acpi_cpu_cx_cst_free_plvlx(sc->cpu_dev, cx_ptr); +#if defined(__i386__) || defined(__amd64__) + if (acpi_PkgFFH_IntelCpu(pkg, 0, &vendor, &class, &address, + &accsize) == 0 && vendor == CST_FFH_VENDOR_INTEL) { + if (class == CST_FFH_INTEL_CL_C1IO) { + /* C1 I/O then Halt */ + cx_ptr->res_rid = sc->cpu_cx_count; + bus_set_resource(sc->cpu_dev, SYS_RES_IOPORT, + cx_ptr->res_rid, address, 1); + cx_ptr->p_lvlx = bus_alloc_resource_any(sc->cpu_dev, + SYS_RES_IOPORT, &cx_ptr->res_rid, RF_ACTIVE | + RF_SHAREABLE); + if (cx_ptr->p_lvlx == NULL) { + bus_delete_resource(sc->cpu_dev, SYS_RES_IOPORT, + cx_ptr->res_rid); + device_printf(sc->cpu_dev, + "C1 I/O failed to allocate port %d, " + "degrading to C1 Halt", (int)address); + } + } else if (class == CST_FFH_INTEL_CL_MWAIT) { + acpi_cpu_cx_cst_mwait(cx_ptr, address, accsize); + } + } +#endif if (sc->cpu_cx_states[0].type == ACPI_STATE_C0) { /* This is the first C1 state. Use the reserved slot. */ sc->cpu_cx_states[0] = *cx_ptr; @@ -818,23 +890,34 @@ acpi_cpu_cx_cst(struct acpi_cpu_softc *sc) } /* Free up any previous register. */ - if (cx_ptr->p_lvlx != NULL) { - bus_release_resource(sc->cpu_dev, cx_ptr->res_type, cx_ptr->res_rid, - cx_ptr->p_lvlx); - cx_ptr->p_lvlx = NULL; - } + acpi_cpu_cx_cst_free_plvlx(sc->cpu_dev, cx_ptr); /* Allocate the control register for C2 or C3. */ - cx_ptr->res_rid = sc->cpu_cx_count; - acpi_PkgGas(sc->cpu_dev, pkg, 0, &cx_ptr->res_type, &cx_ptr->res_rid, - &cx_ptr->p_lvlx, RF_SHAREABLE); - if (cx_ptr->p_lvlx) { +#if defined(__i386__) || defined(__amd64__) + if (acpi_PkgFFH_IntelCpu(pkg, 0, &vendor, &class, &address, + &accsize) == 0 && vendor == CST_FFH_VENDOR_INTEL && + class == CST_FFH_INTEL_CL_MWAIT) { + /* Native C State Instruction use (mwait) */ + acpi_cpu_cx_cst_mwait(cx_ptr, address, accsize); ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "acpi_cpu%d: Got C%d - %d latency\n", - device_get_unit(sc->cpu_dev), cx_ptr->type, - cx_ptr->trans_lat)); + "acpi_cpu%d: Got C%d/mwait - %d latency\n", + device_get_unit(sc->cpu_dev), cx_ptr->type, cx_ptr->trans_lat)); cx_ptr++; sc->cpu_cx_count++; + } else +#endif + { + cx_ptr->res_rid = sc->cpu_cx_count; + acpi_PkgGas(sc->cpu_dev, pkg, 0, &cx_ptr->res_type, + &cx_ptr->res_rid, &cx_ptr->p_lvlx, RF_SHAREABLE); + if (cx_ptr->p_lvlx) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "acpi_cpu%d: Got C%d - %d latency\n", + device_get_unit(sc->cpu_dev), cx_ptr->type, + cx_ptr->trans_lat)); + cx_ptr++; + sc->cpu_cx_count++; + } } } AcpiOsFree(buf.Pointer); @@ -956,6 +1039,13 @@ acpi_cpu_startup_cx(struct acpi_cpu_softc *sc) OID_AUTO, "cx_usage_counters", CTLTYPE_STRING | CTLFLAG_RD, (void *)sc, 0, acpi_cpu_usage_counters_sysctl, "A", "Cx sleep state counters"); +#if defined(__i386__) || defined(__amd64__) + SYSCTL_ADD_PROC(&sc->cpu_sysctl_ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(sc->cpu_dev)), + OID_AUTO, "cx_method", CTLTYPE_STRING | CTLFLAG_RD, + (void *)sc, 0, acpi_cpu_method_sysctl, "A", + "Cx entrance methods"); +#endif /* Signal platform that we can handle _CST notification. */ if (!cpu_cx_generic && cpu_cst_cnt != 0) { @@ -1043,7 +1133,14 @@ acpi_cpu_idle(sbintime_t sbt) */ if (cx_next->type == ACPI_STATE_C1) { cputicks = cpu_ticks(); - acpi_cpu_c1(); + if (cx_next->p_lvlx != NULL) { + /* C1 I/O then Halt */ + CPU_GET_REG(cx_next->p_lvlx, 1); + } + if (cx_next->do_mwait) + acpi_cpu_idle_mwait(cx_next->mwait_hint); + else + acpi_cpu_c1(); end_time = ((cpu_ticks() - cputicks) << 20) / cpu_tickrate(); if (curthread->td_critnest == 0) end_time = min(end_time, 500000 / hz); @@ -1055,7 +1152,7 @@ acpi_cpu_idle(sbintime_t sbt) * For C3, disable bus master arbitration and enable bus master wake * if BM control is available, otherwise flush the CPU cache. */ - if (cx_next->type == ACPI_STATE_C3) { + if (cx_next->type == ACPI_STATE_C3 || cx_next->mwait_bm_avoidance) { if ((cpu_quirks & CPU_QUIRK_NO_BM_CTRL) == 0) { AcpiWriteBitRegister(ACPI_BITREG_ARB_DISABLE, 1); AcpiWriteBitRegister(ACPI_BITREG_BUS_MASTER_RLD, 1); @@ -1076,7 +1173,10 @@ acpi_cpu_idle(sbintime_t sbt) start_time = 0; cputicks = cpu_ticks(); } - CPU_GET_REG(cx_next->p_lvlx, 1); + if (cx_next->do_mwait) + acpi_cpu_idle_mwait(cx_next->mwait_hint); + else + CPU_GET_REG(cx_next->p_lvlx, 1); /* * Read the end time twice. Since it may take an arbitrary time @@ -1092,8 +1192,8 @@ acpi_cpu_idle(sbintime_t sbt) end_time = ((cpu_ticks() - cputicks) << 20) / cpu_tickrate(); /* Enable bus master arbitration and disable bus master wakeup. */ - if (cx_next->type == ACPI_STATE_C3 && - (cpu_quirks & CPU_QUIRK_NO_BM_CTRL) == 0) { + if ((cx_next->type == ACPI_STATE_C3 || cx_next->mwait_bm_avoidance) && + (cpu_quirks & CPU_QUIRK_NO_BM_CTRL) == 0) { AcpiWriteBitRegister(ACPI_BITREG_ARB_DISABLE, 0); AcpiWriteBitRegister(ACPI_BITREG_BUS_MASTER_RLD, 0); } @@ -1286,6 +1386,44 @@ acpi_cpu_usage_counters_sysctl(SYSCTL_HANDLER_ARGS) return (0); } +#if defined(__i386__) || defined(__amd64__) +static int +acpi_cpu_method_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct acpi_cpu_softc *sc; + struct acpi_cx *cx; + struct sbuf sb; + char buf[128]; + int i; + + sc = (struct acpi_cpu_softc *)arg1; + sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); + for (i = 0; i < sc->cpu_cx_count; i++) { + cx = &sc->cpu_cx_states[i]; + sbuf_printf(&sb, "C%d/", i + 1); + if (cx->do_mwait) { + sbuf_cat(&sb, "mwait"); + if (cx->mwait_hw_coord) + sbuf_cat(&sb, "/hwc"); + if (cx->mwait_bm_avoidance) + sbuf_cat(&sb, "/bma"); + } else if (cx->type == ACPI_STATE_C1) { + sbuf_cat(&sb, "hlt"); + } else { + sbuf_cat(&sb, "io"); + } + if (cx->type == ACPI_STATE_C1 && cx->p_lvlx != NULL) + sbuf_cat(&sb, "/iohlt"); + sbuf_putc(&sb, ' '); + } + sbuf_trim(&sb); + sbuf_finish(&sb); + sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); + sbuf_delete(&sb); + return (0); +} +#endif + static int acpi_cpu_set_cx_lowest(struct acpi_cpu_softc *sc) { diff --git a/sys/dev/acpica/acpi_package.c b/sys/dev/acpica/acpi_package.c index e38fea55521c..c1070cb38deb 100644 --- a/sys/dev/acpica/acpi_package.c +++ b/sys/dev/acpica/acpi_package.c @@ -120,6 +120,28 @@ acpi_PkgGas(device_t dev, ACPI_OBJECT *res, int idx, int *type, int *rid, return (acpi_bus_alloc_gas(dev, type, rid, &gas, dst, flags)); } +int +acpi_PkgFFH_IntelCpu(ACPI_OBJECT *res, int idx, int *vendor, int *class, + uint64_t *address, int *accsize) +{ + ACPI_GENERIC_ADDRESS gas; + ACPI_OBJECT *obj; + + obj = &res->Package.Elements[idx]; + if (obj == NULL || obj->Type != ACPI_TYPE_BUFFER || + obj->Buffer.Length < sizeof(ACPI_GENERIC_ADDRESS) + 3) + return (EINVAL); + + memcpy(&gas, obj->Buffer.Pointer + 3, sizeof(gas)); + if (gas.SpaceId != ACPI_ADR_SPACE_FIXED_HARDWARE) + return (ERESTART); + *vendor = gas.BitWidth; + *class = gas.BitOffset; + *address = gas.Address; + *accsize = gas.AccessWidth; + return (0); +} + ACPI_HANDLE acpi_GetReference(ACPI_HANDLE scope, ACPI_OBJECT *obj) { diff --git a/sys/dev/acpica/acpivar.h b/sys/dev/acpica/acpivar.h index 2e2b96d7dcf4..cbd4bd97b9d8 100644 --- a/sys/dev/acpica/acpivar.h +++ b/sys/dev/acpica/acpivar.h @@ -467,6 +467,8 @@ int acpi_PkgInt32(ACPI_OBJECT *res, int idx, uint32_t *dst); int acpi_PkgStr(ACPI_OBJECT *res, int idx, void *dst, size_t size); int acpi_PkgGas(device_t dev, ACPI_OBJECT *res, int idx, int *type, int *rid, struct resource **dst, u_int flags); +int acpi_PkgFFH_IntelCpu(ACPI_OBJECT *res, int idx, int *vendor, + int *class, uint64_t *address, int *accsize); ACPI_HANDLE acpi_GetReference(ACPI_HANDLE scope, ACPI_OBJECT *obj); /* diff --git a/sys/i386/acpica/acpi_machdep.c b/sys/i386/acpica/acpi_machdep.c index 049354b0c213..4c79691dbeee 100644 --- a/sys/i386/acpica/acpi_machdep.c +++ b/sys/i386/acpica/acpi_machdep.c @@ -106,13 +106,6 @@ acpi_machdep_quirks(int *quirks) return (0); } -void -acpi_cpu_c1() -{ - - __asm __volatile("sti; hlt"); -} - /* * Support for mapping ACPI tables during early boot. This abuses the * crashdump map because the kernel cannot allocate KVA in diff --git a/sys/i386/include/md_var.h b/sys/i386/include/md_var.h index bffdd5751d66..b5bd35ef524d 100644 --- a/sys/i386/include/md_var.h +++ b/sys/i386/include/md_var.h @@ -97,6 +97,7 @@ struct dumperinfo; void *alloc_fpusave(int flags); void bcopyb(const void *from, void *to, size_t len); void busdma_swi(void); +bool cpu_mwait_usable(void); void cpu_probe_amdc1e(void); void cpu_setregs(void); void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs)); diff --git a/sys/x86/include/acpica_machdep.h b/sys/x86/include/acpica_machdep.h index 46080c0e8faf..136285caa699 100644 --- a/sys/x86/include/acpica_machdep.h +++ b/sys/x86/include/acpica_machdep.h @@ -74,6 +74,7 @@ enum intr_polarity; void acpi_SetDefaultIntrModel(int model); void acpi_cpu_c1(void); +void acpi_cpu_idle_mwait(uint32_t mwait_hint); void *acpi_map_table(vm_paddr_t pa, const char *sig); void acpi_unmap_table(void *table); vm_paddr_t acpi_find_table(const char *sig); diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c index f8d1f083c244..7925713fb314 100644 --- a/sys/x86/x86/cpu_machdep.c +++ b/sys/x86/x86/cpu_machdep.c @@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$"); #ifdef SMP #include #endif +#include #include #include @@ -121,6 +122,27 @@ cpu_flush_dcache(void *ptr, size_t len) /* Not applicable */ } +void +acpi_cpu_c1(void) +{ + + __asm __volatile("sti; hlt"); +} + +void +acpi_cpu_idle_mwait(uint32_t mwait_hint) +{ + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + /* + * XXXKIB. Software coordination mode should be supported, + * but all Intel CPUs provide hardware coordination. + */ + cpu_monitor(state, 0, 0); + cpu_mwait(MWAIT_INTRBREAK, mwait_hint); +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) @@ -194,6 +216,15 @@ cpu_halt(void) halt(); } +bool +cpu_mwait_usable(void) +{ + + return ((cpu_feature2 & CPUID2_MON) != 0 && ((cpu_mon_mwait_flags & + (CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK)) == + (CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK))); +} + void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ @@ -220,7 +251,7 @@ cpu_idle_acpi(sbintime_t sbt) else if (cpu_idle_hook) cpu_idle_hook(sbt); else - __asm __volatile("sti; hlt"); + acpi_cpu_c1(); *state = STATE_RUNNING; } #endif /* !PC98 */ @@ -253,7 +284,7 @@ cpu_idle_hlt(sbintime_t sbt) if (sched_runnable()) enable_intr(); else - __asm __volatile("sti; hlt"); + acpi_cpu_c1(); *state = STATE_RUNNING; }