diff --git a/sys/riscv/include/pcpu.h b/sys/riscv/include/pcpu.h index 7447d14fa777..0f04db0a5efb 100644 --- a/sys/riscv/include/pcpu.h +++ b/sys/riscv/include/pcpu.h @@ -47,7 +47,8 @@ #define PCPU_MD_FIELDS \ struct pmap *pc_curpmap; /* Currently active pmap */ \ uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \ - char __pad[61] + uint32_t pc_hart; /* Hart ID */ \ + char __pad[57] #ifdef _KERNEL diff --git a/sys/riscv/riscv/intr_machdep.c b/sys/riscv/riscv/intr_machdep.c index 030f073098fc..7251cb250f0a 100644 --- a/sys/riscv/riscv/intr_machdep.c +++ b/sys/riscv/riscv/intr_machdep.c @@ -207,7 +207,7 @@ ipi_send(struct pcpu *pc, int ipi) CTR3(KTR_SMP, "%s: cpu=%d, ipi=%x", __func__, pc->pc_cpuid, ipi); atomic_set_32(&pc->pc_pending_ipis, ipi); - mask = (1 << (pc->pc_cpuid)); + mask = (1 << pc->pc_hart); sbi_send_ipi(&mask); @@ -252,7 +252,7 @@ ipi_selected(cpuset_t cpus, u_int ipi) CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc, ipi); atomic_set_32(&pc->pc_pending_ipis, ipi); - mask |= (1 << (pc->pc_cpuid)); + mask |= (1 << pc->pc_hart); } } sbi_send_ipi(&mask); diff --git a/sys/riscv/riscv/locore.S b/sys/riscv/riscv/locore.S index 30b81ea7493c..c79b4f4a5c1f 100644 --- a/sys/riscv/riscv/locore.S +++ b/sys/riscv/riscv/locore.S @@ -59,11 +59,17 @@ _start: sub t1, t1, t0 li t2, KERNBASE sub s9, t2, t1 /* s9 = physmem base */ - mv s10, a0 /* s10 = hart id */ - mv s11, a1 /* s11 = dtbp */ - /* Direct secondary cores to mpentry */ - bnez s10, mpentry + /* + * a0 = hart id + * a1 = dtbp + */ + + /* Pick a hart to run the boot process. */ + la t0, hart_lottery + li t1, 1 + amoadd.w t0, t1, 0(t0) + bnez t0, mpentry /* * Page tables @@ -123,7 +129,7 @@ _start: /* Create an L2 page superpage for DTB */ la s1, pagetable_l2_devmap - mv s2, s11 + mv s2, a1 srli s2, s2, PAGE_SHIFT li t0, (PTE_KERN) @@ -171,12 +177,18 @@ va: addi sp, sp, -PCB_SIZE /* Clear BSS */ - la a0, _C_LABEL(__bss_start) + la s0, _C_LABEL(__bss_start) la s1, _C_LABEL(_end) 1: - sd zero, 0(a0) - addi a0, a0, 8 - bltu a0, s1, 1b + sd zero, 0(s0) + addi s0, s0, 8 + bltu s0, s1, 1b + +#ifdef SMP + /* Store boot hart id. */ + la t0, boot_hart + sw a0, 0(t0) +#endif /* Fill riscv_bootparams */ addi sp, sp, -40 @@ -190,7 +202,7 @@ va: li t0, (VM_MAX_KERNEL_ADDRESS - 2 * L2_SIZE) sd t0, 24(sp) /* dtbp_virt */ - sd s11, 32(sp) /* dtbp_phys */ + sd a1, 32(sp) /* dtbp_phys */ mv a0, sp call _C_LABEL(initriscv) /* Off we go */ @@ -233,9 +245,11 @@ pagetable_l2: pagetable_l2_devmap: .space PAGE_SIZE - .align 3 + .align 3 virt_map: - .quad virt_map + .quad virt_map +hart_lottery: + .space 4 /* Not in use, but required for linking. */ .align 3 @@ -278,7 +292,8 @@ ENTRY(mpentry) /* Setup stack pointer */ la t0, secondary_stacks li t1, (PAGE_SIZE * KSTACK_PAGES) - mulw t1, t1, s10 + mulw t2, t1, a0 + add t0, t0, t2 add t0, t0, t1 sub t0, t0, s9 li t1, KERNBASE diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c index 417373744aeb..683d890ab1a5 100644 --- a/sys/riscv/riscv/machdep.c +++ b/sys/riscv/riscv/machdep.c @@ -117,6 +117,9 @@ int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ +uint32_t boot_hart; /* The hart we booted on. */ +cpuset_t all_harts; + extern int *end; extern int *initstack_end; @@ -815,6 +818,7 @@ initriscv(struct riscv_bootparams *rvbp) /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; pcpu_init(pcpup, 0, sizeof(struct pcpu)); + pcpup->pc_hart = boot_hart; /* Set the pcpu pointer */ __asm __volatile("mv gp, %0" :: "r"(pcpup)); diff --git a/sys/riscv/riscv/mp_machdep.c b/sys/riscv/riscv/mp_machdep.c index 609d32b30f95..ba0f4be53809 100644 --- a/sys/riscv/riscv/mp_machdep.c +++ b/sys/riscv/riscv/mp_machdep.c @@ -91,6 +91,9 @@ static int ipi_handler(void *); struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; +extern uint32_t boot_hart; +extern cpuset_t all_harts; + #ifdef INVARIANTS static uint32_t cpu_reg[MAXCPU][2]; #endif @@ -99,7 +102,7 @@ static device_t cpu_list[MAXCPU]; void mpentry(unsigned long cpuid); void init_secondary(uint64_t); -uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16); +uint8_t secondary_stacks[MAXCPU][PAGE_SIZE * KSTACK_PAGES] __aligned(16); /* Set to 1 once we're ready to let the APs out of the pen. */ volatile int aps_ready = 0; @@ -182,7 +185,7 @@ riscv64_cpu_attach(device_t dev) static void release_aps(void *dummy __unused) { - u_long mask; + cpuset_t mask; int cpu, i; if (mp_ncpus == 1) @@ -194,15 +197,13 @@ release_aps(void *dummy __unused) atomic_store_rel_int(&aps_ready, 1); /* Wake up the other CPUs */ - mask = 0; - - for (i = 1; i < mp_ncpus; i++) - mask |= (1 << i); - - sbi_send_ipi(&mask); + mask = all_harts; + CPU_CLR(boot_hart, &mask); printf("Release APs\n"); + sbi_send_ipi(mask.__bits); + for (i = 0; i < 2000; i++) { if (smp_started) { for (cpu = 0; cpu <= mp_maxid; cpu++) { @@ -219,12 +220,19 @@ release_aps(void *dummy __unused) SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); void -init_secondary(uint64_t cpu) +init_secondary(uint64_t hart) { struct pcpu *pcpup; + u_int cpuid; + + /* Renumber this cpu */ + cpuid = hart; + if (cpuid < boot_hart) + cpuid += mp_maxid + 1; + cpuid -= boot_hart; /* Setup the pcpu pointer */ - pcpup = &__pcpu[cpu]; + pcpup = &__pcpu[cpuid]; __asm __volatile("mv gp, %0" :: "r"(pcpup)); /* Workaround: make sure wfi doesn't halt the hart */ @@ -366,11 +374,12 @@ cpu_mp_probe(void) static boolean_t cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { - uint64_t target_cpu; struct pcpu *pcpup; + uint64_t hart; + u_int cpuid; - /* Check we are able to start this cpu */ - if (id > mp_maxid) + /* Check if this hart supports MMU. */ + if (OF_getproplen(node, "mmu-type") < 0) return (0); KASSERT(id < MAXCPU, ("Too many CPUs")); @@ -382,28 +391,43 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) cpu_reg[id][1] = reg[1]; #endif - target_cpu = reg[0]; + hart = reg[0]; if (addr_size == 2) { - target_cpu <<= 32; - target_cpu |= reg[1]; + hart <<= 32; + hart |= reg[1]; } - pcpup = &__pcpu[id]; + KASSERT(hart < MAXCPU, ("Too many harts.")); - /* We are already running on cpu 0 */ - if (id == 0) { + /* We are already running on this cpu */ + if (hart == boot_hart) return (1); - } - pcpu_init(pcpup, id, sizeof(struct pcpu)); + /* + * Rotate the CPU IDs to put the boot CPU as CPU 0. + * We keep the other CPUs ordered. + */ + cpuid = hart; + if (cpuid < boot_hart) + cpuid += mp_maxid + 1; + cpuid -= boot_hart; - dpcpu[id - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); - dpcpu_init(dpcpu[id - 1], id); + /* Check if we are able to start this cpu */ + if (cpuid > mp_maxid) + return (0); - printf("Starting CPU %u (%lx)\n", id, target_cpu); - __riscv_boot_ap[id] = 1; + pcpup = &__pcpu[cpuid]; + pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); + pcpup->pc_hart = hart; - CPU_SET(id, &all_cpus); + dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); + dpcpu_init(dpcpu[cpuid - 1], cpuid); + + printf("Starting CPU %u (hart %lx)\n", cpuid, hart); + __riscv_boot_ap[hart] = 1; + + CPU_SET(cpuid, &all_cpus); + CPU_SET(hart, &all_harts); return (1); } @@ -417,6 +441,7 @@ cpu_mp_start(void) mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); CPU_SET(0, &all_cpus); + CPU_SET(boot_hart, &all_harts); switch(cpu_enum_method) { #ifdef FDT @@ -435,13 +460,24 @@ cpu_mp_announce(void) { } +static boolean_t +cpu_check_mmu(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) +{ + + /* Check if this hart supports MMU. */ + if (OF_getproplen(node, "mmu-type") < 0) + return (0); + + return (1); +} + void cpu_mp_setmaxid(void) { #ifdef FDT int cores; - cores = ofw_cpu_early_foreach(NULL, false); + cores = ofw_cpu_early_foreach(cpu_check_mmu, true); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 96c7752699e5..91928a7f04a9 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -273,6 +273,8 @@ static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static struct md_page *pv_table; static struct md_page pv_dummy; +extern cpuset_t all_harts; + /* * Internal flags for pmap_enter()'s helper functions. */ @@ -737,7 +739,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) sched_pin(); mask = pmap->pm_active; - CPU_CLR(PCPU_GET(cpuid), &mask); + CPU_CLR(PCPU_GET(hart), &mask); fence(); if (!CPU_EMPTY(&mask) && smp_started) sbi_remote_sfence_vma(mask.__bits, va, 1); @@ -752,7 +754,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) sched_pin(); mask = pmap->pm_active; - CPU_CLR(PCPU_GET(cpuid), &mask); + CPU_CLR(PCPU_GET(hart), &mask); fence(); if (!CPU_EMPTY(&mask) && smp_started) sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1); @@ -772,7 +774,7 @@ pmap_invalidate_all(pmap_t pmap) sched_pin(); mask = pmap->pm_active; - CPU_CLR(PCPU_GET(cpuid), &mask); + CPU_CLR(PCPU_GET(hart), &mask); /* * XXX: The SBI doc doesn't detail how to specify x0 as the @@ -4255,7 +4257,7 @@ void pmap_activate_sw(struct thread *td) { pmap_t oldpmap, pmap; - u_int cpu; + u_int hart; oldpmap = PCPU_GET(curpmap); pmap = vmspace_pmap(td->td_proc->p_vmspace); @@ -4263,13 +4265,13 @@ pmap_activate_sw(struct thread *td) return; load_satp(pmap->pm_satp); - cpu = PCPU_GET(cpuid); + hart = PCPU_GET(hart); #ifdef SMP - CPU_SET_ATOMIC(cpu, &pmap->pm_active); - CPU_CLR_ATOMIC(cpu, &oldpmap->pm_active); + CPU_SET_ATOMIC(hart, &pmap->pm_active); + CPU_CLR_ATOMIC(hart, &oldpmap->pm_active); #else - CPU_SET(cpu, &pmap->pm_active); - CPU_CLR(cpu, &oldpmap->pm_active); + CPU_SET(hart, &pmap->pm_active); + CPU_CLR(hart, &oldpmap->pm_active); #endif PCPU_SET(curpmap, pmap); @@ -4288,13 +4290,13 @@ pmap_activate(struct thread *td) void pmap_activate_boot(pmap_t pmap) { - u_int cpu; + u_int hart; - cpu = PCPU_GET(cpuid); + hart = PCPU_GET(hart); #ifdef SMP - CPU_SET_ATOMIC(cpu, &pmap->pm_active); + CPU_SET_ATOMIC(hart, &pmap->pm_active); #else - CPU_SET(cpu, &pmap->pm_active); + CPU_SET(hart, &pmap->pm_active); #endif PCPU_SET(curpmap, pmap); } @@ -4313,8 +4315,8 @@ pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) * FENCE.I." */ sched_pin(); - mask = all_cpus; - CPU_CLR(PCPU_GET(cpuid), &mask); + mask = all_harts; + CPU_CLR(PCPU_GET(hart), &mask); fence(); if (!CPU_EMPTY(&mask) && smp_started) sbi_remote_fence_i(mask.__bits);