From 9eb07d5627afe7b23da841465afacf9450dce95c Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Wed, 1 Jul 2020 15:17:45 +0000 Subject: [PATCH] Read the arm64 ID registers earlier in the boot process. Also move parsing the registers to just after the secondary CPUs have started. This means the kernel register view from all CPUs is available after the CPU SYSINITs have finished, e.g. for use by ifunc resolvers. Sponsored by: Innovate UK Differential Revision: https://reviews.freebsd.org/D25505 --- sys/arm64/arm64/identcpu.c | 150 ++++++++++++++++++----------------- sys/arm64/arm64/mp_machdep.c | 19 +++-- 2 files changed, 90 insertions(+), 79 deletions(-) diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c index 2f428b56a64e..6220e7f365ae 100644 --- a/sys/arm64/arm64/identcpu.c +++ b/sys/arm64/arm64/identcpu.c @@ -46,7 +46,6 @@ __FBSDID("$FreeBSD$"); #include #include -static int ident_lock; static void print_cpu_features(u_int cpu); static u_long parse_cpu_features_hwcap(u_int cpu); @@ -67,6 +66,8 @@ static int allow_idc = 1; SYSCTL_INT(_machdep_cache, OID_AUTO, allow_idc, CTLFLAG_RDTUN, &allow_idc, 0, "Allow optimizations based on the IDC cache bit"); +static void check_cpu_regs(u_int cpu); + /* * The default implementation of I-cache sync assumes we have an * aliasing cache until we know otherwise. @@ -1063,8 +1064,9 @@ identify_cpu_sysinit(void *dummy __unused) dic = (allow_dic != 0); idc = (allow_idc != 0); + CPU_FOREACH(cpu) { - print_cpu_features(cpu); + check_cpu_regs(cpu); hwcap = parse_cpu_features_hwcap(cpu); if (elf_hwcap == 0) elf_hwcap = hwcap; @@ -1096,7 +1098,17 @@ identify_cpu_sysinit(void *dummy __unused) install_undef_handler(true, user_mrs_handler); } -SYSINIT(identify_cpu, SI_SUB_SMP, SI_ORDER_ANY, identify_cpu_sysinit, NULL); +SYSINIT(identify_cpu, SI_SUB_CPU, SI_ORDER_ANY, identify_cpu_sysinit, NULL); + +static void +cpu_features_sysinit(void *dummy __unused) +{ + u_int cpu; + + CPU_FOREACH(cpu) + print_cpu_features(cpu); +} +SYSINIT(cpu_features, SI_SUB_SMP, SI_ORDER_ANY, cpu_features_sysinit, NULL); static u_long parse_cpu_features_hwcap(u_int cpu) @@ -1468,7 +1480,8 @@ identify_cpu(void) if (impl_id == cpu_implementers[i].impl_id || cpu_implementers[i].impl_id == 0) { cpu_desc[cpu].cpu_impl = impl_id; - cpu_desc[cpu].cpu_impl_name = cpu_implementers[i].impl_name; + cpu_desc[cpu].cpu_impl_name = + cpu_implementers[i].impl_name; cpu_partsp = cpu_implementers[i].cpu_parts; break; } @@ -1505,77 +1518,68 @@ identify_cpu(void) cpu_desc[cpu].id_aa64mmfr2 = READ_SPECIALREG(id_aa64mmfr2_el1); cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(id_aa64pfr0_el1); cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(id_aa64pfr1_el1); +} - if (cpu != 0) { +static void +check_cpu_regs(u_int cpu) +{ + + switch (cpu_aff_levels) { + case 0: + if (CPU_AFF0(cpu_desc[cpu].mpidr) != + CPU_AFF0(cpu_desc[0].mpidr)) + cpu_aff_levels = 1; + /* FALLTHROUGH */ + case 1: + if (CPU_AFF1(cpu_desc[cpu].mpidr) != + CPU_AFF1(cpu_desc[0].mpidr)) + cpu_aff_levels = 2; + /* FALLTHROUGH */ + case 2: + if (CPU_AFF2(cpu_desc[cpu].mpidr) != + CPU_AFF2(cpu_desc[0].mpidr)) + cpu_aff_levels = 3; + /* FALLTHROUGH */ + case 3: + if (CPU_AFF3(cpu_desc[cpu].mpidr) != + CPU_AFF3(cpu_desc[0].mpidr)) + cpu_aff_levels = 4; + break; + } + + if (cpu_desc[cpu].id_aa64afr0 != cpu_desc[0].id_aa64afr0) + cpu_print_regs |= PRINT_ID_AA64_AFR0; + if (cpu_desc[cpu].id_aa64afr1 != cpu_desc[0].id_aa64afr1) + cpu_print_regs |= PRINT_ID_AA64_AFR1; + + if (cpu_desc[cpu].id_aa64dfr0 != cpu_desc[0].id_aa64dfr0) + cpu_print_regs |= PRINT_ID_AA64_DFR0; + if (cpu_desc[cpu].id_aa64dfr1 != cpu_desc[0].id_aa64dfr1) + cpu_print_regs |= PRINT_ID_AA64_DFR1; + + if (cpu_desc[cpu].id_aa64isar0 != cpu_desc[0].id_aa64isar0) + cpu_print_regs |= PRINT_ID_AA64_ISAR0; + if (cpu_desc[cpu].id_aa64isar1 != cpu_desc[0].id_aa64isar1) + cpu_print_regs |= PRINT_ID_AA64_ISAR1; + + if (cpu_desc[cpu].id_aa64mmfr0 != cpu_desc[0].id_aa64mmfr0) + cpu_print_regs |= PRINT_ID_AA64_MMFR0; + if (cpu_desc[cpu].id_aa64mmfr1 != cpu_desc[0].id_aa64mmfr1) + cpu_print_regs |= PRINT_ID_AA64_MMFR1; + if (cpu_desc[cpu].id_aa64mmfr2 != cpu_desc[0].id_aa64mmfr2) + cpu_print_regs |= PRINT_ID_AA64_MMFR2; + + if (cpu_desc[cpu].id_aa64pfr0 != cpu_desc[0].id_aa64pfr0) + cpu_print_regs |= PRINT_ID_AA64_PFR0; + if (cpu_desc[cpu].id_aa64pfr1 != cpu_desc[0].id_aa64pfr1) + cpu_print_regs |= PRINT_ID_AA64_PFR1; + + if (cpu_desc[cpu].ctr != cpu_desc[0].ctr) { /* - * This code must run on one cpu at a time, but we are - * not scheduling on the current core so implement a - * simple spinlock. + * If the cache type register is different we may + * have a different l1 cache type. */ - while (atomic_cmpset_acq_int(&ident_lock, 0, 1) == 0) - __asm __volatile("wfe" ::: "memory"); - - switch (cpu_aff_levels) { - case 0: - if (CPU_AFF0(cpu_desc[cpu].mpidr) != - CPU_AFF0(cpu_desc[0].mpidr)) - cpu_aff_levels = 1; - /* FALLTHROUGH */ - case 1: - if (CPU_AFF1(cpu_desc[cpu].mpidr) != - CPU_AFF1(cpu_desc[0].mpidr)) - cpu_aff_levels = 2; - /* FALLTHROUGH */ - case 2: - if (CPU_AFF2(cpu_desc[cpu].mpidr) != - CPU_AFF2(cpu_desc[0].mpidr)) - cpu_aff_levels = 3; - /* FALLTHROUGH */ - case 3: - if (CPU_AFF3(cpu_desc[cpu].mpidr) != - CPU_AFF3(cpu_desc[0].mpidr)) - cpu_aff_levels = 4; - break; - } - - if (cpu_desc[cpu].id_aa64afr0 != cpu_desc[0].id_aa64afr0) - cpu_print_regs |= PRINT_ID_AA64_AFR0; - if (cpu_desc[cpu].id_aa64afr1 != cpu_desc[0].id_aa64afr1) - cpu_print_regs |= PRINT_ID_AA64_AFR1; - - if (cpu_desc[cpu].id_aa64dfr0 != cpu_desc[0].id_aa64dfr0) - cpu_print_regs |= PRINT_ID_AA64_DFR0; - if (cpu_desc[cpu].id_aa64dfr1 != cpu_desc[0].id_aa64dfr1) - cpu_print_regs |= PRINT_ID_AA64_DFR1; - - if (cpu_desc[cpu].id_aa64isar0 != cpu_desc[0].id_aa64isar0) - cpu_print_regs |= PRINT_ID_AA64_ISAR0; - if (cpu_desc[cpu].id_aa64isar1 != cpu_desc[0].id_aa64isar1) - cpu_print_regs |= PRINT_ID_AA64_ISAR1; - - if (cpu_desc[cpu].id_aa64mmfr0 != cpu_desc[0].id_aa64mmfr0) - cpu_print_regs |= PRINT_ID_AA64_MMFR0; - if (cpu_desc[cpu].id_aa64mmfr1 != cpu_desc[0].id_aa64mmfr1) - cpu_print_regs |= PRINT_ID_AA64_MMFR1; - if (cpu_desc[cpu].id_aa64mmfr2 != cpu_desc[0].id_aa64mmfr2) - cpu_print_regs |= PRINT_ID_AA64_MMFR2; - - if (cpu_desc[cpu].id_aa64pfr0 != cpu_desc[0].id_aa64pfr0) - cpu_print_regs |= PRINT_ID_AA64_PFR0; - if (cpu_desc[cpu].id_aa64pfr1 != cpu_desc[0].id_aa64pfr1) - cpu_print_regs |= PRINT_ID_AA64_PFR1; - - if (cpu_desc[cpu].ctr != cpu_desc[0].ctr) { - /* - * If the cache type register is different we may - * have a different l1 cache type. - */ - identify_cache(cpu_desc[cpu].ctr); - cpu_print_regs |= PRINT_CTR_EL0; - } - - /* Wake up the other CPUs */ - atomic_store_rel_int(&ident_lock, 0); - __asm __volatile("sev" ::: "memory"); + identify_cache(cpu_desc[cpu].ctr); + cpu_print_regs |= PRINT_CTR_EL0; } } diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c index e3b8e350dce4..b90873c65054 100644 --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -212,6 +212,19 @@ init_secondary(uint64_t cpu) "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); + /* + * Identify current CPU. This is necessary to setup + * affinity registers and to provide support for + * runtime chip identification. + * + * We need this before signalling the CPU is ready to + * let the boot CPU use the results. + */ + identify_cpu(); + + /* Ensure the stores in identify_cpu have completed */ + atomic_thread_fence_acq_rel(); + /* Signal the BSP and spin until it has released all APs. */ atomic_add_int(&aps_started, 1); while (!atomic_load_int(&aps_ready)) @@ -227,12 +240,6 @@ init_secondary(uint64_t cpu) ("pmap0 doesn't match cpu %ld's ttbr0", cpu)); pcpup->pc_curpmap = pmap0; - /* - * Identify current CPU. This is necessary to setup - * affinity registers and to provide support for - * runtime chip identification. - */ - identify_cpu(); install_cpu_errata(); intr_pic_init_secondary();