- Introduce a cpu_ipi_single() function pointer in order to send IPIs

to single CPUs more efficiently with Cheetah(-class) and Jalapeno CPUs.
  Besides being used to implement the ipi_cpu() introduced in r210939,
  cpu_ipi_single() will also be used internally by the sparc64 MD code.
- Factor out the Jalapeno support from the Cheetah IPI send functions
  in order to be able to more easily and efficiently implement support
  for more than 32 target CPUs as well as a workaround for Cheetah+
  erratum 25 for the latter.
This commit is contained in:
Marius Strobl 2010-08-08 00:09:22 +00:00
parent 820a9ea5cb
commit dfab2088e8
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=211050
2 changed files with 176 additions and 25 deletions

View File

@ -98,6 +98,8 @@ void cpu_mp_shutdown(void);
typedef void cpu_ipi_selected_t(u_int, u_long, u_long, u_long);
extern cpu_ipi_selected_t *cpu_ipi_selected;
typedef void cpu_ipi_single_t(u_int, u_long, u_long, u_long);
extern cpu_ipi_single_t *cpu_ipi_single;
void mp_init(u_int cpu_impl);
@ -139,11 +141,7 @@ static __inline void
ipi_cpu(int cpu, u_int ipi)
{
/*
* XXX: Not ideal, but would require more work to add a cpu_ipi_cpu
* function pointer.
*/
cpu_ipi_selected(1 << cpu, 0, (u_long)tl_ipi_level, ipi);
cpu_ipi_single(cpu, 0, (u_long)tl_ipi_level, ipi);
}
#if defined(_MACHINE_PMAP_H_) && defined(_SYS_MUTEX_H_)
@ -243,7 +241,8 @@ ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
ita->ita_pmap = pm;
ita->ita_start = start;
ita->ita_end = end;
cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_range_demap, (u_long)ita);
cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_range_demap,
(u_long)ita);
return (&ita->ita_mask);
}

View File

@ -29,7 +29,7 @@
*/
/*-
* Copyright (c) 2002 Jake Burkholder.
* Copyright (c) 2007 Marius Strobl <marius@FreeBSD.org>
* Copyright (c) 2007 - 2010 Marius Strobl <marius@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -115,6 +115,7 @@ struct pcb stoppcbs[MAXCPU];
struct mtx ipi_mtx;
cpu_ipi_selected_t *cpu_ipi_selected;
cpu_ipi_single_t *cpu_ipi_single;
static vm_offset_t mp_tramp;
static u_int cpuid_to_mid[MAXCPU];
@ -126,11 +127,14 @@ static void ap_start(phandle_t node, u_int mid, u_int cpu_impl);
static void cpu_mp_unleash(void *v);
static void foreach_ap(phandle_t node, void (*func)(phandle_t node,
u_int mid, u_int cpu_impl));
static void spitfire_ipi_send(u_int mid, u_long d0, u_long d1, u_long d2);
static void sun4u_startcpu(phandle_t cpu, void *func, u_long arg);
static cpu_ipi_selected_t cheetah_ipi_selected;
static cpu_ipi_single_t cheetah_ipi_single;
static cpu_ipi_selected_t jalapeno_ipi_selected;
static cpu_ipi_single_t jalapeno_ipi_single;
static cpu_ipi_selected_t spitfire_ipi_selected;
static cpu_ipi_single_t spitfire_ipi_single;
SYSINIT(cpu_mp_unleash, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL);
@ -164,13 +168,18 @@ mp_init(u_int cpu_impl)
* cpu_mp_start() wasn't so initialize these here.
*/
if (cpu_impl == CPU_IMPL_ULTRASPARCIIIi ||
cpu_impl == CPU_IMPL_ULTRASPARCIIIip)
cpu_impl == CPU_IMPL_ULTRASPARCIIIip) {
isjbus = 1;
if (cpu_impl == CPU_IMPL_SPARC64V ||
cpu_impl >= CPU_IMPL_ULTRASPARCIII)
cpu_ipi_selected = jalapeno_ipi_selected;
cpu_ipi_single = jalapeno_ipi_single;
} else if (cpu_impl == CPU_IMPL_SPARC64V ||
cpu_impl >= CPU_IMPL_ULTRASPARCIII) {
cpu_ipi_selected = cheetah_ipi_selected;
else
cpu_ipi_single = cheetah_ipi_single;
} else {
cpu_ipi_selected = spitfire_ipi_selected;
cpu_ipi_single = spitfire_ipi_single;
}
}
static void
@ -181,7 +190,7 @@ foreach_ap(phandle_t node, void (*func)(phandle_t node, u_int mid,
phandle_t child;
u_int cpuid;
uint32_t cpu_impl;
/* There's no need to traverse the whole OFW tree twice. */
if (mp_maxid > 0 && mp_ncpus >= mp_maxid + 1)
return;
@ -201,7 +210,7 @@ foreach_ap(phandle_t node, void (*func)(phandle_t node, u_int mid,
panic("%s: couldn't determine CPU "
"implementation", __func__);
if (OF_getprop(node, cpu_cpuid_prop(cpu_impl), &cpuid,
sizeof(cpuid)) <= 0)
sizeof(cpuid)) <= 0)
panic("%s: couldn't determine CPU module ID",
__func__);
if (cpuid == PCPU_GET(mid))
@ -530,24 +539,25 @@ spitfire_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
{
u_int cpu;
KASSERT((cpus & (1 << curcpu)) == 0,
("%s: CPU can't IPI itself", __func__));
while (cpus) {
cpu = ffs(cpus) - 1;
cpus &= ~(1 << cpu);
spitfire_ipi_send(cpuid_to_mid[cpu], d0, d1, d2);
spitfire_ipi_single(cpu, d0, d1, d2);
}
}
static void
spitfire_ipi_send(u_int mid, u_long d0, u_long d1, u_long d2)
spitfire_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
{
register_t s;
u_long ids;
u_int mid;
int i;
KASSERT(cpu != curcpu, ("%s: CPU can't IPI itself", __func__));
KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_BUSY) == 0,
("%s: outstanding dispatch", __func__));
mid = cpuid_to_mid[cpu];
for (i = 0; i < IPI_RETRIES; i++) {
s = intr_disable();
stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
@ -587,6 +597,49 @@ spitfire_ipi_send(u_int mid, u_long d0, u_long d1, u_long d2)
__func__, mid);
}
static void
cheetah_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
{
register_t s;
u_long ids;
u_int mid;
int i;
KASSERT(cpu != curcpu, ("%s: CPU can't IPI itself", __func__));
KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
IDR_CHEETAH_ALL_BUSY) == 0,
("%s: outstanding dispatch", __func__));
mid = cpuid_to_mid[cpu];
for (i = 0; i < IPI_RETRIES; i++) {
s = intr_disable();
stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
membar(Sync);
stxa(AA_INTR_SEND | (mid << IDC_ITID_SHIFT),
ASI_SDB_INTR_W, 0);
membar(Sync);
while (((ids = ldxa(0, ASI_INTR_DISPATCH_STATUS)) &
IDR_BUSY) != 0)
;
intr_restore(s);
if ((ids & (IDR_BUSY | IDR_NACK)) == 0)
return;
/*
* Leave interrupts enabled for a bit before retrying
* in order to avoid deadlocks if the other CPU is also
* trying to send an IPI.
*/
DELAY(2);
}
if (kdb_active != 0 || panicstr != NULL)
printf("%s: couldn't send IPI to module 0x%u\n",
__func__, mid);
else
panic("%s: couldn't send IPI to module 0x%u",
__func__, mid);
}
static void
cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
{
@ -613,9 +666,8 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
bnp = 0;
for (cpu = 0; cpu < mp_ncpus; cpu++) {
if ((cpus & (1 << cpu)) != 0) {
stxa(AA_INTR_SEND |
(cpuid_to_mid[cpu] << IDC_ITID_SHIFT) |
(isjbus ? 0 : bnp << IDC_BN_SHIFT),
stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] <<
IDC_ITID_SHIFT) | bnp << IDC_BN_SHIFT,
ASI_SDB_INTR_W, 0);
membar(Sync);
bnp++;
@ -625,14 +677,13 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
IDR_CHEETAH_ALL_BUSY) != 0)
;
intr_restore(s);
if ((ids & (IDR_CHEETAH_ALL_BUSY | IDR_CHEETAH_ALL_NACK)) == 0)
if ((ids &
(IDR_CHEETAH_ALL_BUSY | IDR_CHEETAH_ALL_NACK)) == 0)
return;
bnp = 0;
for (cpu = 0; cpu < mp_ncpus; cpu++) {
if ((cpus & (1 << cpu)) != 0) {
if ((ids & (IDR_NACK << (isjbus ?
(2 * cpuid_to_mid[cpu]) :
(2 * bnp)))) == 0)
if ((ids & (IDR_NACK << (2 * bnp))) == 0)
cpus &= ~(1 << cpu);
bnp++;
}
@ -658,3 +709,104 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)",
__func__, cpus, ids);
}
static void
jalapeno_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
{
register_t s;
u_long ids;
u_int busy, busynack, mid;
int i;
KASSERT(cpu != curcpu, ("%s: CPU can't IPI itself", __func__));
KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
IDR_CHEETAH_ALL_BUSY) == 0,
("%s: outstanding dispatch", __func__));
mid = cpuid_to_mid[cpu];
busy = IDR_BUSY << (2 * mid);
busynack = (IDR_BUSY | IDR_NACK) << (2 * mid);
for (i = 0; i < IPI_RETRIES; i++) {
s = intr_disable();
stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
membar(Sync);
stxa(AA_INTR_SEND | (mid << IDC_ITID_SHIFT),
ASI_SDB_INTR_W, 0);
membar(Sync);
while (((ids = ldxa(0, ASI_INTR_DISPATCH_STATUS)) &
busy) != 0)
;
intr_restore(s);
if ((ids & busynack) == 0)
return;
/*
* Leave interrupts enabled for a bit before retrying
* in order to avoid deadlocks if the other CPU is also
* trying to send an IPI.
*/
DELAY(2);
}
if (kdb_active != 0 || panicstr != NULL)
printf("%s: couldn't send IPI to module 0x%u\n",
__func__, mid);
else
panic("%s: couldn't send IPI to module 0x%u",
__func__, mid);
}
static void
jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
{
register_t s;
u_long ids;
u_int cpu;
int i;
KASSERT((cpus & (1 << curcpu)) == 0,
("%s: CPU can't IPI itself", __func__));
KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
IDR_CHEETAH_ALL_BUSY) == 0,
("%s: outstanding dispatch", __func__));
if (cpus == 0)
return;
ids = 0;
for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) {
s = intr_disable();
stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
membar(Sync);
for (cpu = 0; cpu < mp_ncpus; cpu++) {
if ((cpus & (1 << cpu)) != 0) {
stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] <<
IDC_ITID_SHIFT), ASI_SDB_INTR_W, 0);
membar(Sync);
}
}
while (((ids = ldxa(0, ASI_INTR_DISPATCH_STATUS)) &
IDR_CHEETAH_ALL_BUSY) != 0)
;
intr_restore(s);
if ((ids &
(IDR_CHEETAH_ALL_BUSY | IDR_CHEETAH_ALL_NACK)) == 0)
return;
for (cpu = 0; cpu < mp_ncpus; cpu++)
if ((cpus & (1 << cpu)) != 0)
if ((ids & (IDR_NACK <<
(2 * cpuid_to_mid[cpu]))) == 0)
cpus &= ~(1 << cpu);
/*
* Leave interrupts enabled for a bit before retrying
* in order to avoid deadlocks if the other CPUs are
* also trying to send IPIs.
*/
DELAY(2 * mp_ncpus);
}
if (kdb_active != 0 || panicstr != NULL)
printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n",
__func__, cpus, ids);
else
panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)",
__func__, cpus, ids);
}