x86: Implement MWAIT support for stopping a CPU
IPI_STOP is used after panic or when ddb is entered manually. MONITOR/ MWAIT allows CPUs that support the feature to sleep in a low power way instead of spinning. Something similar is already used at idle. It is perhaps especially useful in oversubscribed VM environments, and is safe to use even if the panic/ddb thread is not the BSP. (Except in the presence of MWAIT errata, which are detected automatically on platforms with known wakeup problems.) It can be tuned/sysctled with "machdep.stop_mwait," which defaults to 0 (off). This commit also introduces the tunable "machdep.mwait_cpustop_broken," which defaults to 0, unless the CPU has known errata, but may be set to "1" in loader.conf to signal that mwait wakeup is broken on CPUs FreeBSD does not yet know about. Unfortunately, Bhyve doesn't yet support MONITOR extensions, so this doesn't help bhyve hypervisors running FreeBSD guests. Submitted by: Anton Rang <rang AT acm.org> (earlier version) Reviewed by: kib Sponsored by: Dell EMC Isilon Differential Revision: https://reviews.freebsd.org/D20135
This commit is contained in:
parent
ecaed009a9
commit
665919aaaf
@ -39,7 +39,8 @@
|
||||
|
||||
struct monitorbuf {
|
||||
int idle_state; /* Used by cpu_idle_mwait. */
|
||||
char padding[128 - (1 * sizeof(int))];
|
||||
int stop_state; /* Used by cpustop_handler. */
|
||||
char padding[128 - (2 * sizeof(int))];
|
||||
};
|
||||
_Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
|
||||
|
||||
@ -90,6 +91,9 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#define MONITOR_STOPSTATE_RUNNING 0
|
||||
#define MONITOR_STOPSTATE_STOPPED 1
|
||||
|
||||
#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
|
||||
|
||||
/*
|
||||
|
@ -43,7 +43,8 @@
|
||||
|
||||
struct monitorbuf {
|
||||
int idle_state; /* Used by cpu_idle_mwait. */
|
||||
char padding[128 - (1 * sizeof(int))];
|
||||
int stop_state; /* Used by cpustop_handler. */
|
||||
char padding[128 - (2 * sizeof(int))];
|
||||
};
|
||||
_Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
|
||||
|
||||
@ -90,6 +91,9 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#define MONITOR_STOPSTATE_RUNNING 0
|
||||
#define MONITOR_STOPSTATE_STOPPED 1
|
||||
|
||||
#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
|
||||
|
||||
/*
|
||||
|
@ -351,42 +351,68 @@ generic_restart_cpus(cpuset_t map, u_int type)
|
||||
#endif
|
||||
volatile cpuset_t *cpus;
|
||||
|
||||
KASSERT(type == IPI_STOP || type == IPI_STOP_HARD
|
||||
#if X86
|
||||
|| type == IPI_SUSPEND
|
||||
#endif
|
||||
, ("%s: invalid stop type", __func__));
|
||||
KASSERT(type == IPI_STOP || type == IPI_STOP_HARD
|
||||
|| type == IPI_SUSPEND, ("%s: invalid stop type", __func__));
|
||||
|
||||
if (!smp_started)
|
||||
return (0);
|
||||
|
||||
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
|
||||
|
||||
#if X86
|
||||
if (type == IPI_SUSPEND)
|
||||
cpus = &resuming_cpus;
|
||||
else
|
||||
#endif
|
||||
cpus = &stopped_cpus;
|
||||
|
||||
/* signal other cpus to restart */
|
||||
#if X86
|
||||
if (type == IPI_SUSPEND)
|
||||
CPU_COPY_STORE_REL(&map, &toresume_cpus);
|
||||
else
|
||||
#endif
|
||||
CPU_COPY_STORE_REL(&map, &started_cpus);
|
||||
|
||||
#if X86
|
||||
/*
|
||||
* Wake up any CPUs stopped with MWAIT. From MI code we can't tell if
|
||||
* MONITOR/MWAIT is enabled, but the potentially redundant writes are
|
||||
* relatively inexpensive.
|
||||
*/
|
||||
if (type == IPI_STOP) {
|
||||
struct monitorbuf *mb;
|
||||
u_int id;
|
||||
|
||||
CPU_FOREACH(id) {
|
||||
if (!CPU_ISSET(id, &map))
|
||||
continue;
|
||||
|
||||
mb = &pcpu_find(id)->pc_monitorbuf;
|
||||
atomic_store_int(&mb->stop_state,
|
||||
MONITOR_STOPSTATE_RUNNING);
|
||||
}
|
||||
}
|
||||
|
||||
if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
|
||||
#endif
|
||||
/* wait for each to clear its bit */
|
||||
while (CPU_OVERLAP(cpus, &map))
|
||||
cpu_spinwait();
|
||||
}
|
||||
#else /* !X86 */
|
||||
KASSERT(type == IPI_STOP || type == IPI_STOP_HARD,
|
||||
("%s: invalid stop type", __func__));
|
||||
|
||||
if (!smp_started)
|
||||
return (0);
|
||||
|
||||
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
|
||||
|
||||
cpus = &stopped_cpus;
|
||||
|
||||
/* signal other cpus to restart */
|
||||
CPU_COPY_STORE_REL(&map, &started_cpus);
|
||||
|
||||
/* wait for each to clear its bit */
|
||||
while (CPU_OVERLAP(cpus, &map))
|
||||
cpu_spinwait();
|
||||
#if X86
|
||||
}
|
||||
#endif
|
||||
|
||||
return (1);
|
||||
}
|
||||
|
||||
|
@ -61,6 +61,11 @@ struct cpu_info {
|
||||
};
|
||||
extern struct cpu_info *cpu_info;
|
||||
|
||||
/*
|
||||
* Set if MWAIT does not reliably wake when the MONITORed address is written.
|
||||
*/
|
||||
extern bool mwait_cpustop_broken;
|
||||
|
||||
#ifdef COUNT_IPIS
|
||||
extern u_long *ipi_invltlb_counts[MAXCPU];
|
||||
extern u_long *ipi_invlrng_counts[MAXCPU];
|
||||
|
@ -110,6 +110,13 @@ static u_int cpu_reset_proxyid;
|
||||
static volatile u_int cpu_reset_proxy_active;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Automatically initialized per CPU errata in cpu_idle_tun below.
|
||||
*/
|
||||
bool mwait_cpustop_broken = false;
|
||||
SYSCTL_BOOL(_machdep, OID_AUTO, mwait_cpustop_broken, CTLFLAG_RDTUN,
|
||||
&mwait_cpustop_broken, 0,
|
||||
"Can not reliably wake MONITOR/MWAIT cpus without interrupts");
|
||||
|
||||
/*
|
||||
* Machine dependent boot() routine
|
||||
@ -358,6 +365,7 @@ void
|
||||
cpu_reset(void)
|
||||
{
|
||||
#ifdef SMP
|
||||
struct monitorbuf *mb;
|
||||
cpuset_t map;
|
||||
u_int cnt;
|
||||
|
||||
@ -378,6 +386,9 @@ cpu_reset(void)
|
||||
|
||||
/* Restart CPU #0. */
|
||||
CPU_SETOF(0, &started_cpus);
|
||||
mb = &pcpu_find(0)->pc_monitorbuf;
|
||||
atomic_store_int(&mb->stop_state,
|
||||
MONITOR_STOPSTATE_RUNNING);
|
||||
wmb();
|
||||
|
||||
cnt = 0;
|
||||
@ -716,6 +727,7 @@ cpu_idle_tun(void *unused __unused)
|
||||
/* Ryzen erratas 1057, 1109. */
|
||||
cpu_idle_selector("hlt");
|
||||
idle_mwait = 0;
|
||||
mwait_cpustop_broken = true;
|
||||
}
|
||||
|
||||
if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_id == 0x506c9) {
|
||||
@ -727,6 +739,7 @@ cpu_idle_tun(void *unused __unused)
|
||||
* sleep states.
|
||||
*/
|
||||
cpu_idle_apl31_workaround = 1;
|
||||
mwait_cpustop_broken = true;
|
||||
}
|
||||
TUNABLE_INT_FETCH("machdep.idle_apl31", &cpu_idle_apl31_workaround);
|
||||
}
|
||||
|
@ -161,6 +161,10 @@ struct cache_info {
|
||||
|
||||
unsigned int boot_address;
|
||||
|
||||
static bool stop_mwait = false;
|
||||
SYSCTL_BOOL(_machdep, OID_AUTO, stop_mwait, CTLFLAG_RWTUN, &stop_mwait, 0,
|
||||
"Use MONITOR/MWAIT when stopping CPU, if available");
|
||||
|
||||
#define MiB(v) (v ## ULL << 20)
|
||||
|
||||
void
|
||||
@ -1390,23 +1394,41 @@ nmi_call_kdb_smp(u_int type, struct trapframe *frame)
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle an IPI_STOP by saving our current context and spinning until we
|
||||
* are resumed.
|
||||
* Handle an IPI_STOP by saving our current context and spinning (or mwaiting,
|
||||
* if available) until we are resumed.
|
||||
*/
|
||||
void
|
||||
cpustop_handler(void)
|
||||
{
|
||||
struct monitorbuf *mb;
|
||||
u_int cpu;
|
||||
bool use_mwait;
|
||||
|
||||
cpu = PCPU_GET(cpuid);
|
||||
|
||||
savectx(&stoppcbs[cpu]);
|
||||
|
||||
use_mwait = (stop_mwait && (cpu_feature2 & CPUID2_MON) != 0 &&
|
||||
!mwait_cpustop_broken);
|
||||
if (use_mwait) {
|
||||
mb = PCPU_PTR(monitorbuf);
|
||||
atomic_store_int(&mb->stop_state,
|
||||
MONITOR_STOPSTATE_STOPPED);
|
||||
}
|
||||
|
||||
/* Indicate that we are stopped */
|
||||
CPU_SET_ATOMIC(cpu, &stopped_cpus);
|
||||
|
||||
/* Wait for restart */
|
||||
while (!CPU_ISSET(cpu, &started_cpus)) {
|
||||
if (use_mwait) {
|
||||
cpu_monitor(mb, 0, 0);
|
||||
if (atomic_load_int(&mb->stop_state) ==
|
||||
MONITOR_STOPSTATE_STOPPED)
|
||||
cpu_mwait(0, MWAIT_C1);
|
||||
continue;
|
||||
}
|
||||
|
||||
ia32_pause();
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user