x86: Implement MWAIT support for stopping a CPU

IPI_STOP is used after panic or when ddb is entered manually.  MONITOR/
MWAIT allows CPUs that support the feature to sleep in a low power way
instead of spinning.  Something similar is already used at idle.

It is perhaps especially useful in oversubscribed VM environments, and is
safe to use even if the panic/ddb thread is not the BSP.  (Except in the
presence of MWAIT errata, which are detected automatically on platforms with
known wakeup problems.)

It can be tuned/sysctled with "machdep.stop_mwait," which defaults to 0
(off).  This commit also introduces the tunable
"machdep.mwait_cpustop_broken," which defaults to 0, unless the CPU has
known errata, but may be set to "1" in loader.conf to signal that mwait
wakeup is broken on CPUs FreeBSD does not yet know about.

Unfortunately, Bhyve doesn't yet support MONITOR extensions, so this doesn't
help bhyve hypervisors running FreeBSD guests.

Submitted by:   Anton Rang <rang AT acm.org> (earlier version)
Reviewed by:	kib
Sponsored by:	Dell EMC Isilon
Differential Revision:	https://reviews.freebsd.org/D20135
This commit is contained in:
Conrad Meyer 2019-05-04 20:34:26 +00:00
parent ecaed009a9
commit 665919aaaf
6 changed files with 91 additions and 17 deletions

View File

@ -39,7 +39,8 @@
struct monitorbuf {
int idle_state; /* Used by cpu_idle_mwait. */
char padding[128 - (1 * sizeof(int))];
int stop_state; /* Used by cpustop_handler. */
char padding[128 - (2 * sizeof(int))];
};
_Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
@ -90,6 +91,9 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
#ifdef _KERNEL
#define MONITOR_STOPSTATE_RUNNING 0
#define MONITOR_STOPSTATE_STOPPED 1
#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
/*

View File

@ -43,7 +43,8 @@
struct monitorbuf {
int idle_state; /* Used by cpu_idle_mwait. */
char padding[128 - (1 * sizeof(int))];
int stop_state; /* Used by cpustop_handler. */
char padding[128 - (2 * sizeof(int))];
};
_Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
@ -90,6 +91,9 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
#ifdef _KERNEL
#define MONITOR_STOPSTATE_RUNNING 0
#define MONITOR_STOPSTATE_STOPPED 1
#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
/*

View File

@ -351,42 +351,68 @@ generic_restart_cpus(cpuset_t map, u_int type)
#endif
volatile cpuset_t *cpus;
KASSERT(type == IPI_STOP || type == IPI_STOP_HARD
#if X86
|| type == IPI_SUSPEND
#endif
, ("%s: invalid stop type", __func__));
KASSERT(type == IPI_STOP || type == IPI_STOP_HARD
|| type == IPI_SUSPEND, ("%s: invalid stop type", __func__));
if (!smp_started)
return (0);
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
#if X86
if (type == IPI_SUSPEND)
cpus = &resuming_cpus;
else
#endif
cpus = &stopped_cpus;
/* signal other cpus to restart */
#if X86
if (type == IPI_SUSPEND)
CPU_COPY_STORE_REL(&map, &toresume_cpus);
else
#endif
CPU_COPY_STORE_REL(&map, &started_cpus);
#if X86
/*
* Wake up any CPUs stopped with MWAIT. From MI code we can't tell if
* MONITOR/MWAIT is enabled, but the potentially redundant writes are
* relatively inexpensive.
*/
if (type == IPI_STOP) {
struct monitorbuf *mb;
u_int id;
CPU_FOREACH(id) {
if (!CPU_ISSET(id, &map))
continue;
mb = &pcpu_find(id)->pc_monitorbuf;
atomic_store_int(&mb->stop_state,
MONITOR_STOPSTATE_RUNNING);
}
}
if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
#endif
/* wait for each to clear its bit */
while (CPU_OVERLAP(cpus, &map))
cpu_spinwait();
}
#else /* !X86 */
KASSERT(type == IPI_STOP || type == IPI_STOP_HARD,
("%s: invalid stop type", __func__));
if (!smp_started)
return (0);
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
cpus = &stopped_cpus;
/* signal other cpus to restart */
CPU_COPY_STORE_REL(&map, &started_cpus);
/* wait for each to clear its bit */
while (CPU_OVERLAP(cpus, &map))
cpu_spinwait();
#if X86
}
#endif
return (1);
}

View File

@ -61,6 +61,11 @@ struct cpu_info {
};
extern struct cpu_info *cpu_info;
/*
* Set if MWAIT does not reliably wake when the MONITORed address is written.
*/
extern bool mwait_cpustop_broken;
#ifdef COUNT_IPIS
extern u_long *ipi_invltlb_counts[MAXCPU];
extern u_long *ipi_invlrng_counts[MAXCPU];

View File

@ -110,6 +110,13 @@ static u_int cpu_reset_proxyid;
static volatile u_int cpu_reset_proxy_active;
#endif
/*
* Automatically initialized per CPU errata in cpu_idle_tun below.
*/
bool mwait_cpustop_broken = false;
SYSCTL_BOOL(_machdep, OID_AUTO, mwait_cpustop_broken, CTLFLAG_RDTUN,
&mwait_cpustop_broken, 0,
"Can not reliably wake MONITOR/MWAIT cpus without interrupts");
/*
* Machine dependent boot() routine
@ -358,6 +365,7 @@ void
cpu_reset(void)
{
#ifdef SMP
struct monitorbuf *mb;
cpuset_t map;
u_int cnt;
@ -378,6 +386,9 @@ cpu_reset(void)
/* Restart CPU #0. */
CPU_SETOF(0, &started_cpus);
mb = &pcpu_find(0)->pc_monitorbuf;
atomic_store_int(&mb->stop_state,
MONITOR_STOPSTATE_RUNNING);
wmb();
cnt = 0;
@ -716,6 +727,7 @@ cpu_idle_tun(void *unused __unused)
/* Ryzen erratas 1057, 1109. */
cpu_idle_selector("hlt");
idle_mwait = 0;
mwait_cpustop_broken = true;
}
if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_id == 0x506c9) {
@ -727,6 +739,7 @@ cpu_idle_tun(void *unused __unused)
* sleep states.
*/
cpu_idle_apl31_workaround = 1;
mwait_cpustop_broken = true;
}
TUNABLE_INT_FETCH("machdep.idle_apl31", &cpu_idle_apl31_workaround);
}

View File

@ -161,6 +161,10 @@ struct cache_info {
unsigned int boot_address;
static bool stop_mwait = false;
SYSCTL_BOOL(_machdep, OID_AUTO, stop_mwait, CTLFLAG_RWTUN, &stop_mwait, 0,
"Use MONITOR/MWAIT when stopping CPU, if available");
#define MiB(v) (v ## ULL << 20)
void
@ -1390,23 +1394,41 @@ nmi_call_kdb_smp(u_int type, struct trapframe *frame)
}
/*
* Handle an IPI_STOP by saving our current context and spinning until we
* are resumed.
* Handle an IPI_STOP by saving our current context and spinning (or mwaiting,
* if available) until we are resumed.
*/
void
cpustop_handler(void)
{
struct monitorbuf *mb;
u_int cpu;
bool use_mwait;
cpu = PCPU_GET(cpuid);
savectx(&stoppcbs[cpu]);
use_mwait = (stop_mwait && (cpu_feature2 & CPUID2_MON) != 0 &&
!mwait_cpustop_broken);
if (use_mwait) {
mb = PCPU_PTR(monitorbuf);
atomic_store_int(&mb->stop_state,
MONITOR_STOPSTATE_STOPPED);
}
/* Indicate that we are stopped */
CPU_SET_ATOMIC(cpu, &stopped_cpus);
/* Wait for restart */
while (!CPU_ISSET(cpu, &started_cpus)) {
if (use_mwait) {
cpu_monitor(mb, 0, 0);
if (atomic_load_int(&mb->stop_state) ==
MONITOR_STOPSTATE_STOPPED)
cpu_mwait(0, MWAIT_C1);
continue;
}
ia32_pause();
/*