remove code for dynamic offlining/onlining of CPUs on x86

The code has definitely been broken for SCHED_ULE, which is a default
scheduler.  It may have been broken for SCHED_4BSD in more subtle ways,
e.g. with manually configured CPU affinities and for interrupt devilery
purposes.
We still provide a way to disable individual CPUs or all hyperthreading
"twin" CPUs before SMP startup.  See the UPDATING entry for details.

Interaction between building CPU topology and disabling CPUs still
remains fuzzy: topology is first built using all availble CPUs and then
the disabled CPUs should be "subtracted" from it.  That doesn't work
well if the resulting topology becomes non-uniform.

This work is done in cooperation with Attilio Rao who in addition to
reviewing also provided parts of code.

PR:		kern/145385
Discussed with:	gcooper, ambrisko, mdf, sbruno
Reviewed by:	attilio
Tested by:	pho, pluknet
X-MFC after:	never
This commit is contained in:
Andriy Gapon 2011-06-08 08:12:15 +00:00
parent c8c215e682
commit 234dab4a82
8 changed files with 41 additions and 341 deletions

View File

@ -22,6 +22,23 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW:
machines to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
20110608:
The following sysctls and tunables are retired on x86 platforms:
machdep.hlt_cpus
machdep.hlt_logical_cpus
The following sysctl is retired:
machdep.hyperthreading_allowed
The sysctls were supposed to provide a way to dynamically offline and
online selected CPUs on x86 platforms, but the implementation has not
been reliable especially with SCHED_ULE scheduler.
machdep.hyperthreading_allowed tunable is still available to ignore
hyperthreading CPUs at OS level.
Individual CPUs can be disabled using hint.lapic.X.disabled tunable,
where X is an APIC ID of a CPU. Be advised, though, that disabling
CPUs in non-uniform fashion will result in non-uniform topology and
may lead to sub-optimal system performance with SCHED_ULE, which is
a default scheduler.
20110607:
cpumask_t type is retired and cpuset_t is used in order to describe
a mask of CPUs.

View File

@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include "opt_isa.h"
#include "opt_kstack_pages.h"
#include "opt_maxmem.h"
#include "opt_mp_watchdog.h"
#include "opt_perfmon.h"
#include "opt_sched.h"
#include "opt_kdtrace.h"
@ -116,6 +117,7 @@ __FBSDID("$FreeBSD$");
#include <x86/mca.h>
#include <machine/md_var.h>
#include <machine/metadata.h>
#include <machine/mp_watchdog.h>
#include <machine/pc/bios.h>
#include <machine/pcb.h>
#include <machine/proc.h>
@ -734,9 +736,8 @@ cpu_idle(int busy)
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
#ifdef SMP
if (mp_grab_cpu_hlt())
return;
#ifdef MP_WATCHDOG
ap_watchdog(PCPU_GET(cpuid));
#endif
/* If we are busy - try to use fast methods. */
if (busy) {

View File

@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$");
#include "opt_cpu.h"
#include "opt_kstack_pages.h"
#include "opt_mp_watchdog.h"
#include "opt_sched.h"
#include "opt_smp.h"
@ -64,7 +63,6 @@ __FBSDID("$FreeBSD$");
#include <machine/cpufunc.h>
#include <x86/mca.h>
#include <machine/md_var.h>
#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/smp.h>
@ -160,11 +158,8 @@ static int start_all_aps(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
static int hlt_logical_cpus;
static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
static cpuset_t hyperthreading_cpus_mask;
static int hyperthreading_allowed = 1;
static struct sysctl_ctx_list logical_cpu_clist;
static u_int bootMP_size;
static void
@ -748,11 +743,6 @@ init_secondary(void)
if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
CPU_OR(&logical_cpus_mask, &tcpuset);
/* Determine if we are a hyperthread. */
if (hyperthreading_cpus > 1 &&
PCPU_GET(apic_id) % hyperthreading_cpus != 0)
CPU_OR(&hyperthreading_cpus_mask, &tcpuset);
/* Build our map of 'other' CPUs. */
tallcpus = all_cpus;
CPU_NAND(&tallcpus, &tcpuset);
@ -843,7 +833,7 @@ assign_cpu_ids(void)
if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
cpu_info[i].cpu_hyperthread = 1;
#if defined(SCHED_ULE)
/*
* Don't use HT CPU if it has been disabled by a
* tunable.
@ -852,7 +842,6 @@ assign_cpu_ids(void)
cpu_info[i].cpu_disabled = 1;
continue;
}
#endif
}
/* Don't use this CPU if it has been disabled by a tunable. */
@ -862,6 +851,11 @@ assign_cpu_ids(void)
}
}
if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
hyperthreading_cpus = 0;
cpu_logical = 1;
}
/*
* Assign CPU IDs to local APIC IDs and disable any CPUs
* beyond MAXCPU. CPU 0 is always assigned to the BSP.
@ -1487,159 +1481,6 @@ release_aps(void *dummy __unused)
}
SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
static int
sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
{
cpuset_t mask;
int error;
mask = hlt_cpus_mask;
error = sysctl_handle_opaque(oidp, &mask, sizeof(mask), req);
if (error || !req->newptr)
return (error);
if (!CPU_EMPTY(&logical_cpus_mask) &&
CPU_SUBSET(&mask, &logical_cpus_mask))
hlt_logical_cpus = 1;
else
hlt_logical_cpus = 0;
if (! hyperthreading_allowed)
CPU_OR(&mask, &hyperthreading_cpus_mask);
if (CPU_SUBSET(&mask, &all_cpus))
CPU_CLR(0, &mask);
hlt_cpus_mask = mask;
return (error);
}
SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus,
CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_hlt_cpus, "S",
"Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2.");
static int
sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
{
int disable, error;
disable = hlt_logical_cpus;
error = sysctl_handle_int(oidp, &disable, 0, req);
if (error || !req->newptr)
return (error);
if (disable)
CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
else
CPU_NAND(&hlt_cpus_mask, &logical_cpus_mask);
if (! hyperthreading_allowed)
CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
CPU_CLR(0, &hlt_cpus_mask);
hlt_logical_cpus = disable;
return (error);
}
static int
sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
{
int allowed, error;
allowed = hyperthreading_allowed;
error = sysctl_handle_int(oidp, &allowed, 0, req);
if (error || !req->newptr)
return (error);
#ifdef SCHED_ULE
/*
* SCHED_ULE doesn't allow enabling/disabling HT cores at
* run-time.
*/
if (allowed != hyperthreading_allowed)
return (ENOTSUP);
return (error);
#endif
if (allowed)
CPU_NAND(&hlt_cpus_mask, &hyperthreading_cpus_mask);
else
CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
if (!CPU_EMPTY(&logical_cpus_mask) &&
CPU_SUBSET(&hlt_cpus_mask, &logical_cpus_mask))
hlt_logical_cpus = 1;
else
hlt_logical_cpus = 0;
if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
CPU_CLR(0, &hlt_cpus_mask);
hyperthreading_allowed = allowed;
return (error);
}
static void
cpu_hlt_setup(void *dummy __unused)
{
if (!CPU_EMPTY(&logical_cpus_mask)) {
TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
&hlt_logical_cpus);
sysctl_ctx_init(&logical_cpu_clist);
SYSCTL_ADD_PROC(&logical_cpu_clist,
SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
"hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
sysctl_hlt_logical_cpus, "IU", "");
SYSCTL_ADD_UINT(&logical_cpu_clist,
SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
"logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
&logical_cpus_mask, 0, "");
if (hlt_logical_cpus)
CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
/*
* If necessary for security purposes, force
* hyperthreading off, regardless of the value
* of hlt_logical_cpus.
*/
if (!CPU_EMPTY(&hyperthreading_cpus_mask)) {
SYSCTL_ADD_PROC(&logical_cpu_clist,
SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
"hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
0, 0, sysctl_hyperthreading_allowed, "IU", "");
if (! hyperthreading_allowed)
CPU_OR(&hlt_cpus_mask,
&hyperthreading_cpus_mask);
}
}
}
SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
int
mp_grab_cpu_hlt(void)
{
cpuset_t mask;
#ifdef MP_WATCHDOG
u_int cpuid;
#endif
int retval;
mask = PCPU_GET(cpumask);
#ifdef MP_WATCHDOG
cpuid = PCPU_GET(cpuid);
ap_watchdog(cpuid);
#endif
retval = 0;
while (CPU_OVERLAP(&mask, &hlt_cpus_mask)) {
retval = 1;
__asm __volatile("sti; hlt" : : : "memory");
}
return (retval);
}
#ifdef COUNT_IPIS
/*
* Setup interrupt counters for IPI handlers.

View File

@ -65,7 +65,6 @@ void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
int mp_grab_cpu_hlt(void);
void smp_cache_flush(void);
void smp_invlpg(vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include "opt_isa.h"
#include "opt_kstack_pages.h"
#include "opt_maxmem.h"
#include "opt_mp_watchdog.h"
#include "opt_npx.h"
#include "opt_perfmon.h"
#include "opt_xbox.h"
@ -118,6 +119,7 @@ __FBSDID("$FreeBSD$");
#include <x86/mca.h>
#include <machine/md_var.h>
#include <machine/metadata.h>
#include <machine/mp_watchdog.h>
#include <machine/pc/bios.h>
#include <machine/pcb.h>
#include <machine/pcb_ext.h>
@ -1357,9 +1359,8 @@ cpu_idle(int busy)
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
#if defined(SMP) && !defined(XEN)
if (mp_grab_cpu_hlt())
return;
#if defined(MP_WATCHDOG) && !defined(XEN)
ap_watchdog(PCPU_GET(cpuid));
#endif
#ifndef XEN
/* If we are busy - try to use fast methods. */

View File

@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$");
#include "opt_apic.h"
#include "opt_cpu.h"
#include "opt_kstack_pages.h"
#include "opt_mp_watchdog.h"
#include "opt_pmap.h"
#include "opt_sched.h"
#include "opt_smp.h"
@ -78,7 +77,6 @@ __FBSDID("$FreeBSD$");
#include <machine/cputypes.h>
#include <x86/mca.h>
#include <machine/md_var.h>
#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/smp.h>
@ -209,11 +207,8 @@ static int start_all_aps(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
static int hlt_logical_cpus;
static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
static cpuset_t hyperthreading_cpus_mask;
static int hyperthreading_allowed = 1;
static struct sysctl_ctx_list logical_cpu_clist;
static void
mem_range_AP_init(void)
@ -794,11 +789,6 @@ init_secondary(void)
/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
CPU_OR(&logical_cpus_mask, &tcpuset);
/* Determine if we are a hyperthread. */
if (hyperthreading_cpus > 1 &&
PCPU_GET(apic_id) % hyperthreading_cpus != 0)
CPU_OR(&hyperthreading_cpus_mask, &tcpuset);
/* Build our map of 'other' CPUs. */
tallcpus = all_cpus;
@ -882,7 +872,7 @@ assign_cpu_ids(void)
if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
cpu_info[i].cpu_hyperthread = 1;
#if defined(SCHED_ULE)
/*
* Don't use HT CPU if it has been disabled by a
* tunable.
@ -891,7 +881,6 @@ assign_cpu_ids(void)
cpu_info[i].cpu_disabled = 1;
continue;
}
#endif
}
/* Don't use this CPU if it has been disabled by a tunable. */
@ -901,6 +890,11 @@ assign_cpu_ids(void)
}
}
if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
hyperthreading_cpus = 0;
cpu_logical = 1;
}
/*
* Assign CPU IDs to local APIC IDs and disable any CPUs
* beyond MAXCPU. CPU 0 is always assigned to the BSP.
@ -1550,159 +1544,6 @@ release_aps(void *dummy __unused)
}
SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
static int
sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
{
cpuset_t mask;
int error;
mask = hlt_cpus_mask;
error = sysctl_handle_opaque(oidp, &mask, sizeof(mask), req);
if (error || !req->newptr)
return (error);
if (!CPU_EMPTY(&logical_cpus_mask) &&
CPU_SUBSET(&mask, &logical_cpus_mask))
hlt_logical_cpus = 1;
else
hlt_logical_cpus = 0;
if (! hyperthreading_allowed)
CPU_OR(&mask, &hyperthreading_cpus_mask);
if (CPU_SUBSET(&mask, &all_cpus))
CPU_CLR(0, &mask);
hlt_cpus_mask = mask;
return (error);
}
SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus,
CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_hlt_cpus, "S",
"Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2.");
static int
sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
{
int disable, error;
disable = hlt_logical_cpus;
error = sysctl_handle_int(oidp, &disable, 0, req);
if (error || !req->newptr)
return (error);
if (disable)
CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
else
CPU_NAND(&hlt_cpus_mask, &logical_cpus_mask);
if (! hyperthreading_allowed)
CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
CPU_CLR(0, &hlt_cpus_mask);
hlt_logical_cpus = disable;
return (error);
}
static int
sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
{
int allowed, error;
allowed = hyperthreading_allowed;
error = sysctl_handle_int(oidp, &allowed, 0, req);
if (error || !req->newptr)
return (error);
#ifdef SCHED_ULE
/*
* SCHED_ULE doesn't allow enabling/disabling HT cores at
* run-time.
*/
if (allowed != hyperthreading_allowed)
return (ENOTSUP);
return (error);
#endif
if (allowed)
CPU_NAND(&hlt_cpus_mask, &hyperthreading_cpus_mask);
else
CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
if (!CPU_EMPTY(&logical_cpus_mask) &&
CPU_SUBSET(&hlt_cpus_mask, &logical_cpus_mask))
hlt_logical_cpus = 1;
else
hlt_logical_cpus = 0;
if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
CPU_CLR(0, &hlt_cpus_mask);
hyperthreading_allowed = allowed;
return (error);
}
static void
cpu_hlt_setup(void *dummy __unused)
{
if (!CPU_EMPTY(&logical_cpus_mask)) {
TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
&hlt_logical_cpus);
sysctl_ctx_init(&logical_cpu_clist);
SYSCTL_ADD_PROC(&logical_cpu_clist,
SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
"hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
sysctl_hlt_logical_cpus, "IU", "");
SYSCTL_ADD_UINT(&logical_cpu_clist,
SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
"logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
&logical_cpus_mask, 0, "");
if (hlt_logical_cpus)
CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
/*
* If necessary for security purposes, force
* hyperthreading off, regardless of the value
* of hlt_logical_cpus.
*/
if (!CPU_EMPTY(&hyperthreading_cpus_mask)) {
SYSCTL_ADD_PROC(&logical_cpu_clist,
SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
"hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
0, 0, sysctl_hyperthreading_allowed, "IU", "");
if (! hyperthreading_allowed)
CPU_OR(&hlt_cpus_mask,
&hyperthreading_cpus_mask);
}
}
}
SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
int
mp_grab_cpu_hlt(void)
{
cpuset_t mask;
#ifdef MP_WATCHDOG
u_int cpuid;
#endif
int retval;
mask = PCPU_GET(cpumask);
#ifdef MP_WATCHDOG
cpuid = PCPU_GET(cpuid);
ap_watchdog(cpuid);
#endif
retval = 0;
while (CPU_OVERLAP(&mask, &hlt_cpus_mask)) {
retval = 1;
__asm __volatile("sti; hlt" : : : "memory");
}
return (retval);
}
#ifdef COUNT_IPIS
/*
* Setup interrupt counters for IPI handlers.

View File

@ -68,7 +68,6 @@ void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
int mp_grab_cpu_hlt(void);
void smp_cache_flush(void);
void smp_invlpg(vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include "opt_isa.h"
#include "opt_kstack_pages.h"
#include "opt_maxmem.h"
#include "opt_mp_watchdog.h"
#include "opt_npx.h"
#include "opt_perfmon.h"
@ -115,6 +116,7 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <x86/mca.h>
#include <machine/md_var.h>
#include <machine/mp_watchdog.h>
#include <machine/pc/bios.h>
#include <machine/pcb.h>
#include <machine/pcb_ext.h>
@ -1193,9 +1195,8 @@ cpu_idle(int busy)
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
#ifdef SMP
if (mp_grab_cpu_hlt())
return;
#ifdef MP_WATCHDOG
ap_watchdog(PCPU_GET(cpuid));
#endif
/* If we are busy - try to use fast methods. */
if (busy) {