- Remove the old smp cpu topology specification with a new, more flexible

tree structure that encodes the level of cache sharing and other
   properties.
 - Provide several convenience functions for creating one and two level
   cpu trees as well as a default flat topology.  The system now always
   has some topology.
 - On i386 and amd64 create a seperate level in the hierarchy for HTT
   and multi-core cpus.  This will allow the scheduler to intelligently
   load balance non-uniform cores.  Presently we don't detect what level
   of the cache hierarchy is shared at each level in the topology.
 - Add a mechanism for testing common topologies that have more information
   than the MD code is able to provide via the kern.smp.topology tunable.
   This should be considered a debugging tool only and not a stable api.

Sponsored by:	Nokia
This commit is contained in:
Jeff Roberson 2008-03-02 07:58:42 +00:00
parent bc72ba9cca
commit 81aa71755b
13 changed files with 330 additions and 173 deletions

View File

@ -97,6 +97,10 @@ static struct {
{ "Sledgehammer", CPUCLASS_K8 }, /* CPU_SLEDGEHAMMER */
};
int cpu_cores;
int cpu_logical;
extern int pq_l2size;
extern int pq_l2nways;
@ -360,11 +364,13 @@ printcpuinfo(void)
if ((regs[0] & 0x1f) != 0)
cmp = ((regs[0] >> 26) & 0x3f) + 1;
}
cpu_cores = cmp;
cpu_logical = htt / cmp;
if (cmp > 1)
printf("\n Cores per package: %d", cmp);
if ((htt / cmp) > 1)
printf("\n Logical CPUs per core: %d",
htt / cmp);
cpu_logical);
}
}
/* Avoid ugly blank lines: only print newline when we have to. */

View File

@ -83,12 +83,6 @@ extern int nkpt;
extern struct pcpu __pcpu[];
/*
* CPU topology map datastructures for HTT.
*/
static struct cpu_group mp_groups[MAXCPU];
static struct cpu_top mp_top;
/* AP uses this during bootstrap. Do not staticize. */
char *bootSTK;
static int bootAP;
@ -182,40 +176,38 @@ mem_range_AP_init(void)
mem_range_softc.mr_op->initAP(&mem_range_softc);
}
void
mp_topology(void)
struct cpu_group *
cpu_topo(void)
{
struct cpu_group *group;
int apic_id;
int groups;
int cpu;
/* Build the smp_topology map. */
/* Nothing to do if there is no HTT support. */
if (hyperthreading_cpus <= 1)
return;
group = &mp_groups[0];
groups = 1;
for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
if (!cpu_info[apic_id].cpu_present)
continue;
/*
* If the current group has members and we're not a logical
* cpu, create a new group.
*/
if (group->cg_count != 0 &&
(apic_id % hyperthreading_cpus) == 0) {
group++;
groups++;
}
group->cg_count++;
group->cg_mask |= 1 << cpu;
cpu++;
if (cpu_cores == 0)
cpu_cores = 1;
if (cpu_logical == 0)
cpu_logical = 1;
if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
printf("WARNING: Non-uniform processors.\n");
printf("WARNING: Using suboptimal topology.\n");
return (smp_topo_none());
}
mp_top.ct_count = groups;
mp_top.ct_group = mp_groups;
smp_topology = &mp_top;
/*
* No multi-core or hyper-threaded.
*/
if (cpu_logical * cpu_cores == 1)
return (smp_topo_none());
/*
* Only HTT no multi-core.
*/
if (cpu_logical > 1 && cpu_cores == 1)
return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
/*
* Only multi-core no HTT.
*/
if (cpu_cores > 1 && cpu_logical == 1)
return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
/*
* Both HTT and multi-core.
*/
return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
}
/*
@ -409,9 +401,6 @@ cpu_mp_start(void)
}
set_interrupt_apic_ids();
/* Last, setup the cpu topology now that we have probed CPUs */
mp_topology();
}

View File

@ -36,6 +36,10 @@ extern int boot_cpu_id;
extern struct pcb stoppcbs[];
extern int cpu_apic_ids[];
/* global data in identcpu.c */
extern int cpu_cores;
extern int cpu_logical;
/* IPI handlers */
inthand_t
IDTVEC(invltlb), /* TLB shootdowns - global */
@ -57,7 +61,6 @@ void ipi_self(u_int ipi);
void ipi_bitmap_handler(struct trapframe frame);
u_int mp_bootaddress(u_int);
int mp_grab_cpu_hlt(void);
void mp_topology(void);
void smp_cache_flush(void);
void smp_invlpg(vm_offset_t addr);
void smp_masked_invlpg(u_int mask, vm_offset_t addr);

View File

@ -142,6 +142,9 @@ static struct {
{ "Pentium 4", CPUCLASS_686 }, /* CPU_P4 */
};
int cpu_cores;
int cpu_logical;
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
int has_f00f_bug = 0; /* Initialized so that it can be patched. */
#endif
@ -857,11 +860,13 @@ printcpuinfo(void)
if ((regs[0] & 0x1f) != 0)
cmp = ((regs[0] >> 26) & 0x3f) + 1;
}
cpu_cores = cmp;
cpu_logical = htt / cmp;
if (cmp > 1)
printf("\n Cores per package: %d", cmp);
if ((htt / cmp) > 1)
printf("\n Logical CPUs per core: %d",
htt / cmp);
cpu_logical);
}
} else if (strcmp(cpu_vendor, "CyrixInstead") == 0) {
printf(" DIR=0x%04x", cyrix_did);

View File

@ -135,12 +135,6 @@ extern int nkpt;
extern struct pcpu __pcpu[];
/*
* CPU topology map datastructures for HTT.
*/
static struct cpu_group mp_groups[MAXCPU];
static struct cpu_top mp_top;
/* AP uses this during bootstrap. Do not staticize. */
char *bootSTK;
static int bootAP;
@ -238,40 +232,38 @@ mem_range_AP_init(void)
mem_range_softc.mr_op->initAP(&mem_range_softc);
}
void
mp_topology(void)
struct cpu_group *
cpu_topo(void)
{
struct cpu_group *group;
int apic_id;
int groups;
int cpu;
/* Build the smp_topology map. */
/* Nothing to do if there is no HTT support. */
if (hyperthreading_cpus <= 1)
return;
group = &mp_groups[0];
groups = 1;
for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
if (!cpu_info[apic_id].cpu_present)
continue;
/*
* If the current group has members and we're not a logical
* cpu, create a new group.
*/
if (group->cg_count != 0 &&
(apic_id % hyperthreading_cpus) == 0) {
group++;
groups++;
}
group->cg_count++;
group->cg_mask |= 1 << cpu;
cpu++;
if (cpu_cores == 0)
cpu_cores = 1;
if (cpu_logical == 0)
cpu_logical = 1;
if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
printf("WARNING: Non-uniform processors.\n");
printf("WARNING: Using suboptimal topology.\n");
return (smp_topo_none());
}
mp_top.ct_count = groups;
mp_top.ct_group = mp_groups;
smp_topology = &mp_top;
/*
* No multi-core or hyper-threaded.
*/
if (cpu_logical * cpu_cores == 1)
return (smp_topo_none());
/*
* Only HTT no multi-core.
*/
if (cpu_logical > 1 && cpu_cores == 1)
return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
/*
* Only multi-core no HTT.
*/
if (cpu_cores > 1 && cpu_logical == 1)
return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
/*
* Both HTT and multi-core.
*/
return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
}
@ -459,9 +451,6 @@ cpu_mp_start(void)
}
set_interrupt_apic_ids();
/* Last, setup the cpu topology now that we have probed CPUs */
mp_topology();
}

View File

@ -45,6 +45,10 @@ extern u_long *ipi_rendezvous_counts[MAXCPU];
extern u_long *ipi_lazypmap_counts[MAXCPU];
#endif
/* global data in identcpu.c */
extern int cpu_cores;
extern int cpu_logical;
/* IPI handlers */
inthand_t
IDTVEC(invltlb), /* TLB shootdowns - global */
@ -67,7 +71,6 @@ void ipi_self(u_int ipi);
void ipi_bitmap_handler(struct trapframe frame);
u_int mp_bootaddress(u_int);
int mp_grab_cpu_hlt(void);
void mp_topology(void);
void smp_cache_flush(void);
void smp_invlpg(vm_offset_t addr);
void smp_masked_invlpg(u_int mask, vm_offset_t addr);

View File

@ -84,6 +84,13 @@ volatile int ap_spin;
static void cpu_mp_unleash(void *);
struct cpu_group *
cpu_topo(void)
{
return smp_topo_none();
}
void
ia64_ap_startup(void)
{

View File

@ -243,7 +243,6 @@ static int tryself = 1;
static int steal_htt = 1;
static int steal_idle = 1;
static int steal_thresh = 2;
static int topology = 0;
/*
* One thread queue per processor.
@ -1211,43 +1210,6 @@ tdg_add(struct tdq_group *tdg, struct tdq *tdq)
TDQ_ID(tdq), TDG_ID(tdg), tdg->tdg_cpus, tdg->tdg_cpumask);
}
static void
sched_setup_topology(void)
{
struct tdq_group *tdg;
struct cpu_group *cg;
int balance_groups;
struct tdq *tdq;
int i;
int j;
topology = 1;
balance_groups = 0;
for (i = 0; i < smp_topology->ct_count; i++) {
cg = &smp_topology->ct_group[i];
tdg = &tdq_groups[i];
/*
* Initialize the group.
*/
tdg_setup(tdg);
/*
* Find all of the group members and add them.
*/
for (j = 0; j < MAXCPU; j++) {
if ((cg->cg_mask & (1 << j)) != 0) {
tdq = TDQ_CPU(j);
tdq_setup(tdq);
tdg_add(tdg, tdq);
}
}
if (tdg->tdg_cpus > 1)
balance_groups = 1;
}
tdg_maxid = smp_topology->ct_count - 1;
if (balance_groups)
sched_balance_groups();
}
static void
sched_setup_smp(void)
{
@ -1271,25 +1233,6 @@ sched_setup_smp(void)
}
tdg_maxid = cpus - 1;
}
/*
* Fake a topology with one group containing all CPUs.
*/
static void
sched_fake_topo(void)
{
#ifdef SCHED_FAKE_TOPOLOGY
static struct cpu_top top;
static struct cpu_group group;
top.ct_count = 1;
top.ct_group = &group;
group.cg_mask = all_cpus;
group.cg_count = mp_ncpus;
group.cg_children = 0;
smp_topology = &top;
#endif
}
#endif
/*
@ -1303,15 +1246,11 @@ sched_setup(void *dummy)
tdq = TDQ_SELF();
#ifdef SMP
sched_fake_topo();
/*
* Setup tdqs based on a topology configuration or vanilla SMP based
* on mp_maxid.
*/
if (smp_topology == NULL)
sched_setup_smp();
else
sched_setup_topology();
sched_setup_smp();
balance_tdq = tdq;
sched_balance();
#else
@ -2692,8 +2631,6 @@ SYSCTL_INT(_kern_sched, OID_AUTO, steal_idle, CTLFLAG_RW, &steal_idle, 0,
"Attempts to steal work from other cores before idling");
SYSCTL_INT(_kern_sched, OID_AUTO, steal_thresh, CTLFLAG_RW, &steal_thresh, 0,
"Minimum load on remote cpu before we'll steal");
SYSCTL_INT(_kern_sched, OID_AUTO, topology, CTLFLAG_RD, &topology, 0,
"True when a topology has been specified by the MD code.");
#endif
/* ps compat. All cpu percentages from ULE are weighted. */

View File

@ -68,7 +68,6 @@ int mp_ncpus;
/* export this for libkvm consumers. */
int mp_maxcpus = MAXCPU;
struct cpu_top *smp_topology;
volatile int smp_started;
u_int mp_maxid;
@ -90,6 +89,11 @@ int smp_cpus = 1; /* how many cpu's running */
SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD, &smp_cpus, 0,
"Number of CPUs online");
int smp_topology = 0; /* Which topology we're using. */
SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RD, &smp_topology, 0,
"Topology override setting; 0 is default provided by hardware.");
TUNABLE_INT("kern.smp.topology", &smp_topology);
#ifdef SMP
/* Enable forwarding of a signal to a process running on a different CPU */
static int forward_signal_enabled = 1;
@ -385,22 +389,177 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
mtx_unlock_spin(&smp_ipi_mtx);
}
#else /* !SMP */
/*
* Provide dummy SMP support for UP kernels. Modules that need to use SMP
* APIs will still work using this dummy support.
*/
static void
mp_setvariables_for_up(void *dummy)
static struct cpu_group group[MAXCPU];
struct cpu_group *
smp_topo(void)
{
mp_ncpus = 1;
mp_maxid = PCPU_GET(cpuid);
all_cpus = PCPU_GET(cpumask);
KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero"));
struct cpu_group *top;
/*
* Check for a fake topology request for debugging purposes.
*/
switch (smp_topology) {
case 1:
/* Dual core with no sharing. */
top = smp_topo_1level(CG_SHARE_NONE, 2, 0);
break;
case 3:
/* Dual core with shared L2. */
top = smp_topo_1level(CG_SHARE_L2, 2, 0);
break;
case 4:
/* quad core, shared l3 among each package, private l2. */
top = smp_topo_1level(CG_SHARE_L3, 4, 0);
break;
case 5:
/* quad core, 2 dualcore parts on each package share l2. */
top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0);
break;
case 6:
/* Single-core 2xHTT */
top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT);
break;
case 7:
/* quad core with a shared l3, 8 threads sharing L2. */
top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8,
CG_FLAG_THREAD);
break;
default:
/* Default, ask the system what it wants. */
top = cpu_topo();
break;
}
/*
* Verify the returned topology.
*/
if (top->cg_count != mp_ncpus)
panic("Built bad topology at %p. CPU count %d != %d",
top, top->cg_count, mp_ncpus);
if (top->cg_mask != all_cpus)
panic("Built bad topology at %p. CPU mask 0x%X != 0x%X",
top, top->cg_mask, all_cpus);
return (top);
}
SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
mp_setvariables_for_up, NULL)
struct cpu_group *
smp_topo_none(void)
{
struct cpu_group *top;
top = &group[0];
top->cg_parent = NULL;
top->cg_child = NULL;
top->cg_mask = (1 << mp_ncpus) - 1;
top->cg_count = mp_ncpus;
top->cg_children = 0;
top->cg_level = CG_SHARE_NONE;
top->cg_flags = 0;
return (top);
}
static int
smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
int count, int flags, int start)
{
cpumask_t mask;
int i;
for (mask = 0, i = 0; i < count; i++, start++)
mask |= (1 << start);
child->cg_parent = parent;
child->cg_child = NULL;
child->cg_children = 0;
child->cg_level = share;
child->cg_count = count;
child->cg_flags = flags;
child->cg_mask = mask;
parent->cg_children++;
for (; parent != NULL; parent = parent->cg_parent) {
if ((parent->cg_mask & child->cg_mask) != 0)
panic("Duplicate children in %p. mask 0x%X child 0x%X",
parent, parent->cg_mask, child->cg_mask);
parent->cg_mask |= child->cg_mask;
parent->cg_count += child->cg_count;
}
return (start);
}
struct cpu_group *
smp_topo_1level(int share, int count, int flags)
{
struct cpu_group *child;
struct cpu_group *top;
int packages;
int cpu;
int i;
cpu = 0;
top = &group[0];
packages = mp_ncpus / count;
top->cg_child = child = &group[1];
top->cg_level = CG_SHARE_NONE;
for (i = 0; i < packages; i++, child++)
cpu = smp_topo_addleaf(top, child, share, count, flags, cpu);
return (top);
}
struct cpu_group *
smp_topo_2level(int l2share, int l2count, int l1share, int l1count,
int l1flags)
{
struct cpu_group *top;
struct cpu_group *l1g;
struct cpu_group *l2g;
int cpu;
int i;
int j;
cpu = 0;
top = &group[0];
l2g = &group[1];
top->cg_child = l2g;
top->cg_level = CG_SHARE_NONE;
top->cg_children = mp_ncpus / (l2count * l1count);
l1g = l2g + top->cg_children;
for (i = 0; i < top->cg_children; i++, l2g++) {
l2g->cg_parent = top;
l2g->cg_child = l1g;
l2g->cg_level = l2share;
for (j = 0; j < l2count; j++, l1g++)
cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count,
l1flags, cpu);
}
return (top);
}
struct cpu_group *
smp_topo_find(struct cpu_group *top, int cpu)
{
struct cpu_group *cg;
cpumask_t mask;
int children;
int i;
mask = (1 << cpu);
cg = top;
for (;;) {
if ((cg->cg_mask & mask) == 0)
return (NULL);
if (cg->cg_children == 0)
return (cg);
children = cg->cg_children;
for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
if ((cg->cg_mask & mask) != 0)
break;
}
return (NULL);
}
#else /* !SMP */
void
smp_rendezvous(void (*setup_func)(void *),
@ -416,4 +575,19 @@ smp_rendezvous(void (*setup_func)(void *),
if (teardown_func != NULL)
teardown_func(arg);
}
/*
* Provide dummy SMP support for UP kernels. Modules that need to use SMP
* APIs will still work using this dummy support.
*/
static void
mp_setvariables_for_up(void *dummy)
{
mp_ncpus = 1;
mp_maxid = PCPU_GET(cpuid);
all_cpus = PCPU_GET(cpumask);
KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero"));
}
SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
mp_setvariables_for_up, NULL)
#endif /* SMP */

View File

@ -45,6 +45,13 @@
int boot_cpu_id;
struct cpu_group *
cpu_topo(void)
{
return smp_topo_none();
}
void
cpu_mp_setmaxid(void)
{

View File

@ -189,6 +189,13 @@ cpu_mp_probe(void)
return (mp_maxid > 0);
}
struct cpu_group *
cpu_topo(void)
{
return smp_topo_none();
}
static void
sun4u_startcpu(phandle_t cpu, void *func, u_long arg)
{

View File

@ -241,6 +241,13 @@ cpu_mp_probe(void)
return (mp_maxid > 0);
}
struct cpu_group *
cpu_topo(void)
{
return smp_topo_none();
}
static int
start_ap_bycpuid(int cpuid, void *func, u_long arg)
{

View File

@ -32,18 +32,40 @@
*/
struct cpu_group {
cpumask_t cg_mask; /* Mask of cpus in this group. */
int cg_count; /* Count of cpus in this group. */
int cg_children; /* Number of children groups. */
struct cpu_group *cg_child; /* Optional child group. */
struct cpu_group *cg_parent; /* Our parent group. */
struct cpu_group *cg_child; /* Optional children groups. */
cpumask_t cg_mask; /* Mask of cpus in this group. */
int8_t cg_count; /* Count of cpus in this group. */
int8_t cg_children; /* Number of children groups. */
int8_t cg_level; /* Shared cache level. */
int8_t cg_flags; /* Traversal modifiers. */
};
struct cpu_top {
int ct_count; /* Count of groups. */
struct cpu_group *ct_group; /* Array of pointers to cpu groups. */
};
/*
* Defines common resources for CPUs in the group. The highest level
* resource should be used when multiple are shared.
*/
#define CG_SHARE_NONE 0
#define CG_SHARE_L1 1
#define CG_SHARE_L2 2
#define CG_SHARE_L3 3
/*
* Behavior modifiers for load balancing and affinity.
*/
#define CG_FLAG_HTT 0x01 /* Schedule the alternate core last. */
#define CG_FLAG_THREAD 0x02 /* New age htt, less crippled. */
/*
* Convenience routines for building topologies.
*/
struct cpu_group *smp_topo(void);
struct cpu_group *smp_topo_none(void);
struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
int l1count, int l1flags);
struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
extern struct cpu_top *smp_topology;
extern void (*cpustop_restartfunc)(void);
extern int smp_active;
extern int smp_cpus;
@ -90,6 +112,7 @@ extern cpumask_t all_cpus;
*/
struct thread;
struct cpu_group *cpu_topo(void);
void cpu_mp_announce(void);
int cpu_mp_probe(void);
void cpu_mp_setmaxid(void);