arm64: Print per-CPU cache summary

Summary:
It can be useful to see a summary of CPU caches on bootup.  This is done
for most platforms already, so add this to arm64, in the form of (taken
from Apple M1 pro test):

  L1 cache: 192KB (instruction), 128KB (data)
  L2 cache: 12288KB (unified)

This is printed out per-CPU, only under bootverbose.

Future refinements could instead determine if a cache level is shared
with other cores (L2 is shared among cores on some SoCs, for instance),
and perform a better calculation to the full true cache sizes.  For
instance, it's known that the M1 pro, on which this test was done, has 2
12MB L2 clusters, for a total of 24MB.  Seeing each CPU with 12288KB L2
would make one think that there's 12MB * NCPUs, for possibly 120MB
cache, which is incorrect.

Sponsored by:	Juniper Networks, Inc.
Reviewed by:	#arm64, andrew
Differential Revision: https://reviews.freebsd.org/D35366
This commit is contained in:
Justin Hibbits 2022-05-31 10:40:20 -05:00
parent 45ef6b4b5d
commit 139ba152c9
2 changed files with 115 additions and 0 deletions

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
static void print_cpu_midr(struct sbuf *sb, u_int cpu);
static void print_cpu_features(u_int cpu);
static void print_cpu_caches(struct sbuf *sb, u_int);
#ifdef COMPAT_FREEBSD32
static u_long parse_cpu_features_hwcap32(void);
#endif
@ -103,6 +104,8 @@ static char cpu_model[64];
SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD,
cpu_model, sizeof(cpu_model), "Machine model");
#define MAX_CACHES 8 /* Maximum number of caches supported
architecturally. */
/*
* Per-CPU affinity as provided in MPIDR_EL1
* Indexed by CPU number in logical order selected by the system.
@ -135,6 +138,8 @@ struct cpu_desc {
uint64_t mvfr0;
uint64_t mvfr1;
#endif
uint64_t clidr;
uint32_t ccsidr[MAX_CACHES][2]; /* 2 possible types. */
};
static struct cpu_desc cpu_desc[MAXCPU];
@ -1805,6 +1810,7 @@ cpu_features_sysinit(void *dummy __unused)
/* Fill in cpu_model for the hw.model sysctl */
sbuf_new(&sb, cpu_model, sizeof(cpu_model), SBUF_FIXEDLEN);
print_cpu_midr(&sb, 0);
sbuf_finish(&sb);
sbuf_delete(&sb);
}
@ -1978,6 +1984,62 @@ print_cpu_midr(struct sbuf *sb, u_int cpu)
cpu_part_name, CPU_VAR(midr), CPU_REV(midr));
}
static void
print_cpu_cache(u_int cpu, struct sbuf *sb, uint64_t ccs, bool icache,
bool unified)
{
size_t cache_size;
size_t line_size;
/* LineSize is Log2(S) - 4. */
line_size = 1 << ((ccs & CCSIDR_LineSize_MASK) + 4);
/*
* Calculate cache size (sets * ways * line size). There are different
* formats depending on the FEAT_CCIDX bit in ID_AA64MMFR2 feature
* register.
*/
if ((cpu_desc[cpu].id_aa64mmfr2 & ID_AA64MMFR2_CCIDX_64))
cache_size = (CCSIDR_NSETS_64(ccs) + 1) *
(CCSIDR_ASSOC_64(ccs) + 1);
else
cache_size = (CCSIDR_NSETS(ccs) + 1) * (CCSIDR_ASSOC(ccs) + 1);
cache_size *= line_size;
sbuf_printf(sb, "%zuKB (%s)", cache_size / 1024,
icache ? "instruction" : unified ? "unified" : "data");
}
static void
print_cpu_caches(struct sbuf *sb, u_int cpu)
{
/* Print out each cache combination */
uint64_t clidr;
int i = 1;
clidr = cpu_desc[cpu].clidr;
for (i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) {
int j = 0;
int ctype_m = (clidr & CLIDR_CTYPE_MASK);
sbuf_printf(sb, " L%d cache: ", i + 1);
if ((clidr & CLIDR_CTYPE_IO)) {
print_cpu_cache(cpu, sb, cpu_desc[cpu].ccsidr[i][j++],
true, false);
/* If there's more, add to the line. */
if ((ctype_m & ~CLIDR_CTYPE_IO) != 0)
sbuf_printf(sb, ", ");
}
if ((ctype_m & ~CLIDR_CTYPE_IO) != 0) {
print_cpu_cache(cpu, sb, cpu_desc[cpu].ccsidr[i][j],
false, (clidr & CLIDR_CTYPE_UNIFIED));
}
sbuf_printf(sb, "\n");
}
sbuf_finish(sb);
printf("%s", sbuf_data(sb));
}
static void
print_cpu_features(u_int cpu)
{
@ -2107,6 +2169,8 @@ print_cpu_features(u_int cpu)
print_id_register(sb, "AArch32 Media and VFP Features 1",
cpu_desc[cpu].mvfr1, mvfr1_fields);
#endif
if (bootverbose)
print_cpu_caches(sb, cpu);
sbuf_delete(sb);
sb = NULL;
@ -2156,6 +2220,8 @@ identify_cache(uint64_t ctr)
void
identify_cpu(u_int cpu)
{
uint64_t clidr;
/* Save affinity for current CPU */
cpu_desc[cpu].mpidr = get_mpidr();
CPU_AFFINITY(cpu) = cpu_desc[cpu].mpidr & CPU_AFF_MASK;
@ -2170,6 +2236,25 @@ identify_cpu(u_int cpu)
cpu_desc[cpu].id_aa64mmfr2 = READ_SPECIALREG(id_aa64mmfr2_el1);
cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(id_aa64pfr0_el1);
cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(id_aa64pfr1_el1);
cpu_desc[cpu].clidr = READ_SPECIALREG(clidr_el1);
clidr = cpu_desc[cpu].clidr;
for (int i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) {
int j = 0;
if ((clidr & CLIDR_CTYPE_IO)) {
WRITE_SPECIALREG(csselr_el1,
CSSELR_Level(i) | CSSELR_InD);
cpu_desc[cpu].ccsidr[i][j++] =
READ_SPECIALREG(ccsidr_el1);
}
if ((clidr & ~CLIDR_CTYPE_IO) == 0)
continue;
WRITE_SPECIALREG(csselr_el1, CSSELR_Level(i));
cpu_desc[cpu].ccsidr[i][j] = READ_SPECIALREG(ccsidr_el1);
}
#ifdef COMPAT_FREEBSD32
/* Only read aarch32 SRs if EL0-32 is available */
if (ID_AA64PFR0_EL0_VAL(cpu_desc[cpu].id_aa64pfr0) ==

View File

@ -69,6 +69,32 @@
#define UL(x) UINT64_C(x)
/* CCSIDR_EL1 - Cache Size ID Register */
#define CCSIDR_NumSets_MASK 0x0FFFE000
#define CCSIDR_NumSets64_MASK 0x00FFFFFF00000000
#define CCSIDR_NumSets_SHIFT 13
#define CCSIDR_NumSets64_SHIFT 32
#define CCSIDR_Assoc_MASK 0x00001FF8
#define CCSIDR_Assoc64_MASK 0x0000000000FFFFF8
#define CCSIDR_Assoc_SHIFT 3
#define CCSIDR_Assoc64_SHIFT 3
#define CCSIDR_LineSize_MASK 0x7
#define CCSIDR_NSETS(idr) \
(((idr) & CCSIDR_NumSets_MASK) >> CCSIDR_NumSets_SHIFT)
#define CCSIDR_ASSOC(idr) \
(((idr) & CCSIDR_Assoc_MASK) >> CCSIDR_Assoc_SHIFT)
#define CCSIDR_NSETS_64(idr) \
(((idr) & CCSIDR_NumSets64_MASK) >> CCSIDR_NumSets64_SHIFT)
#define CCSIDR_ASSOC_64(idr) \
(((idr) & CCSIDR_Assoc64_MASK) >> CCSIDR_Assoc64_SHIFT)
/* CLIDR_EL1 - Cache level ID register */
#define CLIDR_CTYPE_MASK 0x7 /* Cache type mask bits */
#define CLIDR_CTYPE_IO 0x1 /* Instruction only */
#define CLIDR_CTYPE_DO 0x2 /* Data only */
#define CLIDR_CTYPE_ID 0x3 /* Split instruction and data */
#define CLIDR_CTYPE_UNIFIED 0x4 /* Unified */
/* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */
#define CNTHCTL_EVNTI_MASK (0xf << 4) /* Bit to trigger event stream */
#define CNTHCTL_EVNTDIR (1 << 3) /* Control transition trigger bit */
@ -119,6 +145,10 @@
#define CPACR_FPEN_TRAP_NONE (0x3 << 20) /* No traps */
#define CPACR_TTA (0x1 << 28)
/* CSSELR_EL1 - Cache size selection register */
#define CSSELR_Level(i) (i << 1)
#define CSSELR_InD 0x00000001
/* CTR_EL0 - Cache Type Register */
#define CTR_RES1 (1 << 31)
#define CTR_TminLine_SHIFT 32