Implement delivery of tlb shootdown ipis. This is currently more fine grained

than the other implementations; we have complete control over the tlb, so we
only demap specific pages.  We take advantage of the ranged tlb flush api
to send one ipi for a range of pages, and due to the pm_active optimization
we rarely send ipis for demaps from user pmaps.

Remove now unused routines to load the tlb; this is only done once outside
of the tlb fault handlers.
Minor cleanups to the smp startup code.

This boots multi user with both cpus active on a dual ultra 60 and on a
dual ultra 2.
This commit is contained in:
Jake Burkholder 2002-03-07 06:01:40 +00:00
parent 39028e8396
commit 4f91e3efb2
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=91783
14 changed files with 340 additions and 329 deletions

View File

@ -48,8 +48,10 @@
#define IPI_RETRIES 100
struct cpu_start_args {
u_int csa_count;
u_int csa_mid;
u_int csa_state;
vm_offset_t csa_pcpu;
u_long csa_tick;
u_long csa_ver;
struct tte csa_ttes[PCPU_PAGES];
@ -63,7 +65,7 @@ struct ipi_level_args {
struct ipi_tlb_args {
u_int ita_count;
u_long ita_tlb;
u_long ita_ctx;
struct pmap *ita_pmap;
u_long ita_start;
u_long ita_end;
};
@ -85,8 +87,6 @@ vm_offset_t mp_tramp_alloc(void);
extern struct ipi_level_args ipi_level_args;
extern struct ipi_tlb_args ipi_tlb_args;
extern int mp_ncpus;
extern vm_offset_t mp_tramp;
extern char *mp_tramp_code;
extern u_long mp_tramp_code_len;
@ -103,58 +103,68 @@ extern char tl_ipi_tlb_range_demap[];
#ifdef SMP
#ifdef _MACHINE_PMAP_H_
static __inline void *
ipi_tlb_context_demap(u_int ctx)
ipi_tlb_context_demap(struct pmap *pm)
{
struct ipi_tlb_args *ita;
u_int cpus;
if (mp_ncpus == 1)
if (smp_cpus == 1)
return (NULL);
if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0)
return (NULL);
ita = &ipi_tlb_args;
ita->ita_count = mp_ncpus;
ita->ita_ctx = ctx;
cpu_ipi_selected(PCPU_GET(other_cpus), 0,
(u_long)tl_ipi_tlb_context_demap, (u_long)ita);
ita->ita_count = smp_cpus;
ita->ita_pmap = pm;
cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_context_demap,
(u_long)ita);
return (&ita->ita_count);
}
static __inline void *
ipi_tlb_page_demap(u_int tlb, u_int ctx, vm_offset_t va)
ipi_tlb_page_demap(u_int tlb, struct pmap *pm, vm_offset_t va)
{
struct ipi_tlb_args *ita;
u_int cpus;
if (mp_ncpus == 1)
if (smp_cpus == 1)
return (NULL);
if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0)
return (NULL);
ita = &ipi_tlb_args;
ita->ita_count = mp_ncpus;
ita->ita_count = smp_cpus;
ita->ita_tlb = tlb;
ita->ita_ctx = ctx;
ita->ita_pmap = pm;
ita->ita_va = va;
cpu_ipi_selected(PCPU_GET(other_cpus), 0,
(u_long)tl_ipi_tlb_page_demap, (u_long)ita);
cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_page_demap, (u_long)ita);
return (&ita->ita_count);
}
static __inline void *
ipi_tlb_range_demap(u_int ctx, vm_offset_t start, vm_offset_t end)
ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
{
struct ipi_tlb_args *ita;
u_int cpus;
if (mp_ncpus == 1)
if (smp_cpus == 1)
return (NULL);
if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0)
return (NULL);
ita = &ipi_tlb_args;
ita->ita_count = mp_ncpus;
ita->ita_ctx = ctx;
ita->ita_count = smp_cpus;
ita->ita_pmap = pm;
ita->ita_start = start;
ita->ita_end = end;
cpu_ipi_selected(PCPU_GET(other_cpus), 0,
(u_long)tl_ipi_tlb_range_demap, (u_long)ita);
cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_range_demap, (u_long)ita);
return (&ita->ita_count);
}
static __inline void
ipi_wait(void *cookie)
{
#if 0
u_int *count;
if ((count = cookie) != NULL) {
@ -162,24 +172,27 @@ ipi_wait(void *cookie)
while (*count != 0)
;
}
#endif
}
#endif
#else
static __inline void *
ipi_tlb_context_demap(u_int ctx)
ipi_tlb_context_demap(struct pmap *pm)
{
return (NULL);
}
static __inline void *
ipi_tlb_page_demap(u_int tlb, u_int ctx, vm_offset_t va)
ipi_tlb_page_demap(u_int tlb, struct pmap *pm, vm_offset_t va)
{
return (NULL);
}
static __inline void *
ipi_tlb_range_demap(u_int ctx, vm_offset_t start, vm_offset_t end)
ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
{
return (NULL);
}

View File

@ -29,10 +29,10 @@
#ifndef _MACHINE_TLB_H_
#define _MACHINE_TLB_H_
#define TLB_SLOT_COUNT 64
#define TLB_SLOT_COUNT 64 /* XXX */
#define TLB_SLOT_TSB_KERNEL_MIN 62 /* XXX */
#define TLB_SLOT_KERNEL 63
#define TLB_SLOT_KERNEL 63 /* XXX */
#define TLB_DAR_SLOT_SHIFT (3)
#define TLB_DAR_SLOT(slot) ((slot) << TLB_DAR_SLOT_SHIFT)
@ -89,170 +89,91 @@ extern int kernel_tlb_slots;
extern struct tte *kernel_ttes;
/*
* Some tlb operations must be atomical, so no interrupt or trap can be allowed
* Some tlb operations must be atomic, so no interrupt or trap can be allowed
* while they are in progress. Traps should not happen, but interrupts need to
* be explicitely disabled. critical_enter() cannot be used here, since it only
* disables soft interrupts.
* XXX: is something like this needed elsewhere, too?
*/
static __inline void
tlb_dtlb_context_primary_demap(void)
{
stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_DMMU_DEMAP, 0);
membar(Sync);
}
static __inline void
tlb_dtlb_page_demap(struct pmap *pm, vm_offset_t va)
{
u_int ctx;
ctx = pm->pm_context[PCPU_GET(cpuid)];
if (ctx == TLB_CTX_KERNEL) {
stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE,
ASI_DMMU_DEMAP, 0);
membar(Sync);
} else if (ctx != -1) {
stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE,
ASI_DMMU_DEMAP, 0);
membar(Sync);
}
}
static __inline void
tlb_dtlb_store(vm_offset_t va, u_long ctx, struct tte tte)
{
u_long pst;
pst = intr_disable();
stxa(AA_DMMU_TAR, ASI_DMMU,
TLB_TAR_VA(va) | TLB_TAR_CTX(ctx));
stxa(0, ASI_DTLB_DATA_IN_REG, tte.tte_data);
membar(Sync);
intr_restore(pst);
}
static __inline void
tlb_dtlb_store_slot(vm_offset_t va, u_long ctx, struct tte tte, int slot)
{
u_long pst;
pst = intr_disable();
stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) | TLB_TAR_CTX(ctx));
stxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_ACCESS_REG, tte.tte_data);
membar(Sync);
intr_restore(pst);
}
static __inline void
tlb_itlb_context_primary_demap(void)
{
stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_IMMU_DEMAP, 0);
membar(Sync);
}
static __inline void
tlb_itlb_page_demap(struct pmap *pm, vm_offset_t va)
{
u_int ctx;
ctx = pm->pm_context[PCPU_GET(cpuid)];
if (ctx == TLB_CTX_KERNEL) {
stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE,
ASI_IMMU_DEMAP, 0);
flush(KERNBASE);
} else if (ctx != -1) {
stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE,
ASI_IMMU_DEMAP, 0);
membar(Sync);
}
}
static __inline void
tlb_itlb_store(vm_offset_t va, u_long ctx, struct tte tte)
{
u_long pst;
pst = intr_disable();
stxa(AA_IMMU_TAR, ASI_IMMU, TLB_TAR_VA(va) | TLB_TAR_CTX(ctx));
stxa(0, ASI_ITLB_DATA_IN_REG, tte.tte_data);
if (ctx == TLB_CTX_KERNEL)
flush(va);
else {
/*
* flush probably not needed and impossible here, no access to
* user page.
*/
membar(Sync);
}
intr_restore(pst);
}
static __inline void
tlb_context_demap(struct pmap *pm)
{
u_int ctx;
void *cookie;
u_long s;
ctx = pm->pm_context[PCPU_GET(cpuid)];
if (ctx != -1) {
tlb_dtlb_context_primary_demap();
tlb_itlb_context_primary_demap();
cookie = ipi_tlb_context_demap(pm);
if (pm->pm_active & PCPU_GET(cpumask)) {
KASSERT(pm->pm_context[PCPU_GET(cpuid)] != -1,
("tlb_context_demap: inactive pmap?"));
s = intr_disable();
stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_DMMU_DEMAP, 0);
stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_IMMU_DEMAP, 0);
membar(Sync);
intr_restore(s);
}
}
static __inline void
tlb_itlb_store_slot(vm_offset_t va, u_long ctx, struct tte tte, int slot)
{
u_long pst;
pst = intr_disable();
stxa(AA_IMMU_TAR, ASI_IMMU, TLB_TAR_VA(va) | TLB_TAR_CTX(ctx));
stxa(TLB_DAR_SLOT(slot), ASI_ITLB_DATA_ACCESS_REG, tte.tte_data);
flush(va);
intr_restore(pst);
ipi_wait(cookie);
}
static __inline void
tlb_page_demap(u_int tlb, struct pmap *pm, vm_offset_t va)
{
if (tlb & TLB_DTLB)
tlb_dtlb_page_demap(pm, va);
if (tlb & TLB_ITLB)
tlb_itlb_page_demap(pm, va);
u_long flags;
void *cookie;
u_long s;
cookie = ipi_tlb_page_demap(tlb, pm, va);
if (pm->pm_active & PCPU_GET(cpumask)) {
KASSERT(pm->pm_context[PCPU_GET(cpuid)] != -1,
("tlb_page_demap: inactive pmap?"));
if (pm == kernel_pmap)
flags = TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE;
else
flags = TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE;
s = intr_disable();
if (tlb & TLB_DTLB) {
stxa(TLB_DEMAP_VA(va) | flags, ASI_DMMU_DEMAP, 0);
membar(Sync);
}
if (tlb & TLB_ITLB) {
stxa(TLB_DEMAP_VA(va) | flags, ASI_IMMU_DEMAP, 0);
membar(Sync);
}
intr_restore(s);
}
ipi_wait(cookie);
}
static __inline void
tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
{
for (; start < end; start += PAGE_SIZE)
tlb_page_demap(TLB_DTLB | TLB_ITLB, pm, start);
vm_offset_t va;
void *cookie;
u_long flags;
u_long s;
cookie = ipi_tlb_range_demap(pm, start, end);
if (pm->pm_active & PCPU_GET(cpumask)) {
KASSERT(pm->pm_context[PCPU_GET(cpuid)] != -1,
("tlb_range_demap: inactive pmap?"));
if (pm == kernel_pmap)
flags = TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE;
else
flags = TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE;
s = intr_disable();
for (va = start; va < end; va += PAGE_SIZE) {
stxa(TLB_DEMAP_VA(va) | flags, ASI_DMMU_DEMAP, 0);
stxa(TLB_DEMAP_VA(va) | flags, ASI_IMMU_DEMAP, 0);
membar(Sync);
}
intr_restore(s);
}
ipi_wait(cookie);
}
static __inline void
tlb_tte_demap(struct tte tte, struct pmap *pm)
{
tlb_page_demap(TD_GET_TLB(tte.tte_data), pm, TV_GET_VA(tte.tte_vpn));
}
static __inline void
tlb_store(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte)
{
KASSERT(ctx != -1, ("tlb_store: invalid context"));
if (tlb & TLB_DTLB)
tlb_dtlb_store(va, ctx, tte);
if (tlb & TLB_ITLB)
tlb_itlb_store(va, ctx, tte);
}
static __inline void
tlb_store_slot(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte, int slot)
{
KASSERT(ctx != -1, ("tlb_store_slot: invalid context"));
if (tlb & TLB_DTLB)
tlb_dtlb_store_slot(va, ctx, tte, slot);
if (tlb & TLB_ITLB)
tlb_itlb_store_slot(va, ctx, tte, slot);
}
#define tlb_tte_demap(tte, pm) \
tlb_page_demap(TD_GET_TLB((tte).tte_data), pm, \
TV_GET_VA((tte).tte_vpn));
#endif /* !_MACHINE_TLB_H_ */

View File

@ -113,6 +113,7 @@
#include <sys/bus.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <vm/vm.h>
@ -125,6 +126,7 @@
#include <machine/bus.h>
#include <machine/cache.h>
#include <machine/pmap.h>
#include <machine/smp.h>
#include <machine/tlb.h>
/* ASI's for bus access. */

View File

@ -34,6 +34,7 @@
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/signal.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <sys/ucontext.h>
#include <sys/user.h>
@ -90,7 +91,7 @@ ASSYM(LSU_VW, LSU_VW);
ASSYM(TAR_VPN_SHIFT, TAR_VPN_SHIFT);
ASSYM(TLB_DEMAP_NUCLEUS, TLB_DEMAP_NUCLEUS);
ASSYM(TLB_DEMAP_SECONDARY, TLB_DEMAP_SECONDARY);
ASSYM(TLB_DEMAP_PRIMARY, TLB_DEMAP_PRIMARY);
ASSYM(TLB_DEMAP_CONTEXT, TLB_DEMAP_CONTEXT);
ASSYM(TLB_DEMAP_PAGE, TLB_DEMAP_PAGE);
@ -111,6 +112,7 @@ ASSYM(CPU_CLKSYNC, CPU_CLKSYNC);
ASSYM(CPU_INIT, CPU_INIT);
ASSYM(CSA_MID, offsetof(struct cpu_start_args, csa_mid));
ASSYM(CSA_PCPU, offsetof(struct cpu_start_args, csa_pcpu));
ASSYM(CSA_STATE, offsetof(struct cpu_start_args, csa_state));
ASSYM(CSA_TICK, offsetof(struct cpu_start_args, csa_tick));
ASSYM(CSA_VER, offsetof(struct cpu_start_args, csa_ver));
@ -179,7 +181,7 @@ ASSYM(IQE_ARG, offsetof(struct iqe, iqe_arg));
ASSYM(ILA_LEVEL, offsetof(struct ipi_level_args, ila_level));
ASSYM(ITA_TLB, offsetof(struct ipi_tlb_args, ita_tlb));
ASSYM(ITA_CTX, offsetof(struct ipi_tlb_args, ita_ctx));
ASSYM(ITA_PMAP, offsetof(struct ipi_tlb_args, ita_pmap));
ASSYM(ITA_START, offsetof(struct ipi_tlb_args, ita_start));
ASSYM(ITA_END, offsetof(struct ipi_tlb_args, ita_end));
ASSYM(ITA_VA, offsetof(struct ipi_tlb_args, ita_va));

View File

@ -67,6 +67,7 @@
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/timetc.h>
#include <sys/user.h>
#include <sys/ucontext.h>
#include <sys/user.h>
#include <sys/ucontext.h>
@ -93,6 +94,7 @@
#include <machine/md_var.h>
#include <machine/metadata.h>
#include <machine/ofw_machdep.h>
#include <machine/smp.h>
#include <machine/pmap.h>
#include <machine/pstate.h>
#include <machine/reg.h>

View File

@ -36,12 +36,15 @@
.register %g2, #ignore
.register %g3, #ignore
#if 0
#define IPI_WAIT(r1, r2, r3) \
ATOMIC_DEC_INT(r1, r2, r3) ; \
9: membar #StoreLoad ; \
lduw [r1], r2 ; \
9: lduw [r1], r2 ; \
brnz,a,pn r2, 9b ; \
nop
#else
#define IPI_WAIT(r1, r2, r3)
#endif
/*
* Trigger a softint at the desired level.
@ -76,36 +79,41 @@ END(tl_ipi_test)
* Demap a page from the dtlb and/or itlb.
*/
ENTRY(tl_ipi_tlb_page_demap)
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "ipi_tlb_page_demap: pm=%p va=%#lx"
, %g1, %g2, %g3, 7, 8, 9)
ldx [%g5 + ITA_PMAP], %g2
stx %g2, [%g1 + KTR_PARM1]
ldx [%g5 + ITA_VA], %g2
stx %g2, [%g1 + KTR_PARM2]
9:
#endif
ldx [%g5 + ITA_PMAP], %g1
SET(kernel_pmap_store, %g3, %g2)
mov TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g3
cmp %g1, %g2
movne %xcc, TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, %g3
ldx [%g5 + ITA_TLB], %g1
ldx [%g5 + ITA_CTX], %g2
ldx [%g5 + ITA_VA], %g3
ldx [%g5 + ITA_VA], %g2
or %g2, %g3, %g2
wr %g0, ASI_DMMU, %asi
brz,a,pt %g2, 1f
or %g3, TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g3
stxa %g2, [%g0 + AA_DMMU_SCXR] %asi
andcc %g1, TLB_DTLB, %g0
bz,a,pn %xcc, 1f
nop
stxa %g0, [%g2] ASI_DMMU_DEMAP
membar #Sync
or %g3, TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE, %g3
1: andcc %g1, TLB_DTLB, %g0
bz,a,pn %xcc, 2f
1: andcc %g1, TLB_ITLB, %g0
bz,a,pn %xcc, 2f
nop
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g2] ASI_IMMU_DEMAP
membar #Sync
2: andcc %g1, TLB_ITLB, %g0
bz,a,pn %xcc, 3f
nop
stxa %g0, [%g3] ASI_IMMU_DEMAP
3: brz,a,pt %g2, 4f
nop
stxa %g0, [%g0 + AA_DMMU_SCXR] %asi
4: membar #Sync
IPI_WAIT(%g5, %g1, %g2)
2: IPI_WAIT(%g5, %g1, %g2)
retry
END(tl_ipi_tlb_page_demap)
@ -113,55 +121,62 @@ END(tl_ipi_tlb_page_demap)
* Demap a range of pages from the dtlb and itlb.
*/
ENTRY(tl_ipi_tlb_range_demap)
ldx [%g5 + ITA_CTX], %g1
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "ipi_tlb_range_demap: pm=%p start=%#lx end=%#lx"
, %g1, %g2, %g3, 7, 8, 9)
ldx [%g5 + ITA_PMAP], %g2
stx %g2, [%g1 + KTR_PARM1]
ldx [%g5 + ITA_START], %g2
ldx [%g5 + ITA_END], %g3
stx %g2, [%g1 + KTR_PARM2]
ldx [%g5 + ITA_END], %g2
stx %g2, [%g1 + KTR_PARM3]
9:
#endif
wr %g0, ASI_DMMU, %asi
ldx [%g5 + ITA_PMAP], %g1
brz,a,pt %g1, 1f
mov TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g4
SET(kernel_pmap_store, %g3, %g2)
mov TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g3
stxa %g1, [%g0 + AA_DMMU_SCXR] %asi
membar #Sync
mov TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE, %g4
cmp %g1, %g2
movne %xcc, TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, %g3
1: set PAGE_SIZE, %g5
ldx [%g5 + ITA_START], %g1
ldx [%g5 + ITA_END], %g2
2: or %g4, %g2, %g4
set PAGE_SIZE, %g6
1: or %g1, %g3, %g4
stxa %g0, [%g4] ASI_DMMU_DEMAP
stxa %g0, [%g4] ASI_IMMU_DEMAP
membar #Sync
add %g2, %g5, %g2
cmp %g2, %g3
bne,a,pt %xcc, 2b
add %g1, %g6, %g1
cmp %g1, %g2
blt,a,pt %xcc, 1b
nop
brz,a,pt %g1, 3f
nop
stxa %g0, [%g0 + AA_DMMU_SCXR] %asi
3: membar #Sync
IPI_WAIT(%g5, %g1, %g2)
retry
END(tl_ipi_tlb_range_demap)
/*
* Demap an entire context from the dtlb and itlb.
* Demap the primary context from the dtlb and itlb.
*/
ENTRY(tl_ipi_tlb_context_demap)
ldx [%g5 + ITA_CTX], %g1
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "ipi_tlb_page_demap: pm=%p va=%#lx"
, %g1, %g2, %g3, 7, 8, 9)
ldx [%g5 + ITA_PMAP], %g2
stx %g2, [%g1 + KTR_PARM1]
ldx [%g5 + ITA_VA], %g2
stx %g2, [%g1 + KTR_PARM2]
9:
#endif
mov AA_DMMU_SCXR, %g2
stxa %g1, [%g2] ASI_DMMU
membar #Sync
mov TLB_DEMAP_SECONDARY | TLB_DEMAP_CONTEXT, %g3
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
stxa %g0, [%g2] ASI_DMMU
mov TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, %g1
stxa %g0, [%g1] ASI_DMMU_DEMAP
stxa %g0, [%g1] ASI_IMMU_DEMAP
membar #Sync
IPI_WAIT(%g5, %g1, %g2)

View File

@ -36,12 +36,15 @@
.register %g2, #ignore
.register %g3, #ignore
#if 0
#define IPI_WAIT(r1, r2, r3) \
ATOMIC_DEC_INT(r1, r2, r3) ; \
9: membar #StoreLoad ; \
lduw [r1], r2 ; \
9: lduw [r1], r2 ; \
brnz,a,pn r2, 9b ; \
nop
#else
#define IPI_WAIT(r1, r2, r3)
#endif
/*
* Trigger a softint at the desired level.
@ -76,36 +79,41 @@ END(tl_ipi_test)
* Demap a page from the dtlb and/or itlb.
*/
ENTRY(tl_ipi_tlb_page_demap)
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "ipi_tlb_page_demap: pm=%p va=%#lx"
, %g1, %g2, %g3, 7, 8, 9)
ldx [%g5 + ITA_PMAP], %g2
stx %g2, [%g1 + KTR_PARM1]
ldx [%g5 + ITA_VA], %g2
stx %g2, [%g1 + KTR_PARM2]
9:
#endif
ldx [%g5 + ITA_PMAP], %g1
SET(kernel_pmap_store, %g3, %g2)
mov TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g3
cmp %g1, %g2
movne %xcc, TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, %g3
ldx [%g5 + ITA_TLB], %g1
ldx [%g5 + ITA_CTX], %g2
ldx [%g5 + ITA_VA], %g3
ldx [%g5 + ITA_VA], %g2
or %g2, %g3, %g2
wr %g0, ASI_DMMU, %asi
brz,a,pt %g2, 1f
or %g3, TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g3
stxa %g2, [%g0 + AA_DMMU_SCXR] %asi
andcc %g1, TLB_DTLB, %g0
bz,a,pn %xcc, 1f
nop
stxa %g0, [%g2] ASI_DMMU_DEMAP
membar #Sync
or %g3, TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE, %g3
1: andcc %g1, TLB_DTLB, %g0
bz,a,pn %xcc, 2f
1: andcc %g1, TLB_ITLB, %g0
bz,a,pn %xcc, 2f
nop
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g2] ASI_IMMU_DEMAP
membar #Sync
2: andcc %g1, TLB_ITLB, %g0
bz,a,pn %xcc, 3f
nop
stxa %g0, [%g3] ASI_IMMU_DEMAP
3: brz,a,pt %g2, 4f
nop
stxa %g0, [%g0 + AA_DMMU_SCXR] %asi
4: membar #Sync
IPI_WAIT(%g5, %g1, %g2)
2: IPI_WAIT(%g5, %g1, %g2)
retry
END(tl_ipi_tlb_page_demap)
@ -113,55 +121,62 @@ END(tl_ipi_tlb_page_demap)
* Demap a range of pages from the dtlb and itlb.
*/
ENTRY(tl_ipi_tlb_range_demap)
ldx [%g5 + ITA_CTX], %g1
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "ipi_tlb_range_demap: pm=%p start=%#lx end=%#lx"
, %g1, %g2, %g3, 7, 8, 9)
ldx [%g5 + ITA_PMAP], %g2
stx %g2, [%g1 + KTR_PARM1]
ldx [%g5 + ITA_START], %g2
ldx [%g5 + ITA_END], %g3
stx %g2, [%g1 + KTR_PARM2]
ldx [%g5 + ITA_END], %g2
stx %g2, [%g1 + KTR_PARM3]
9:
#endif
wr %g0, ASI_DMMU, %asi
ldx [%g5 + ITA_PMAP], %g1
brz,a,pt %g1, 1f
mov TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g4
SET(kernel_pmap_store, %g3, %g2)
mov TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g3
stxa %g1, [%g0 + AA_DMMU_SCXR] %asi
membar #Sync
mov TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE, %g4
cmp %g1, %g2
movne %xcc, TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, %g3
1: set PAGE_SIZE, %g5
ldx [%g5 + ITA_START], %g1
ldx [%g5 + ITA_END], %g2
2: or %g4, %g2, %g4
set PAGE_SIZE, %g6
1: or %g1, %g3, %g4
stxa %g0, [%g4] ASI_DMMU_DEMAP
stxa %g0, [%g4] ASI_IMMU_DEMAP
membar #Sync
add %g2, %g5, %g2
cmp %g2, %g3
bne,a,pt %xcc, 2b
add %g1, %g6, %g1
cmp %g1, %g2
blt,a,pt %xcc, 1b
nop
brz,a,pt %g1, 3f
nop
stxa %g0, [%g0 + AA_DMMU_SCXR] %asi
3: membar #Sync
IPI_WAIT(%g5, %g1, %g2)
retry
END(tl_ipi_tlb_range_demap)
/*
* Demap an entire context from the dtlb and itlb.
* Demap the primary context from the dtlb and itlb.
*/
ENTRY(tl_ipi_tlb_context_demap)
ldx [%g5 + ITA_CTX], %g1
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP, "ipi_tlb_page_demap: pm=%p va=%#lx"
, %g1, %g2, %g3, 7, 8, 9)
ldx [%g5 + ITA_PMAP], %g2
stx %g2, [%g1 + KTR_PARM1]
ldx [%g5 + ITA_VA], %g2
stx %g2, [%g1 + KTR_PARM2]
9:
#endif
mov AA_DMMU_SCXR, %g2
stxa %g1, [%g2] ASI_DMMU
membar #Sync
mov TLB_DEMAP_SECONDARY | TLB_DEMAP_CONTEXT, %g3
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
stxa %g0, [%g2] ASI_DMMU
mov TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, %g1
stxa %g0, [%g1] ASI_DMMU_DEMAP
stxa %g0, [%g1] ASI_IMMU_DEMAP
membar #Sync
IPI_WAIT(%g5, %g1, %g2)

View File

@ -120,9 +120,9 @@ ENTRY(mp_startup)
/*
* Wait till its our turn to bootstrap.
*/
1: lduw [%l0 + CSA_MID], %l1
2: lduw [%l0 + CSA_MID], %l1
cmp %l1, %o0
bne %xcc, 1b
bne %xcc, 2b
nop
#if KTR_COMPILE & KTR_SMP
@ -132,25 +132,34 @@ ENTRY(mp_startup)
9:
#endif
/*
* Find our per-cpu page and the tte data that we will use to map it.
*/
ldx [%l0 + CSA_TTES + TTE_VPN], %l1
ldx [%l0 + CSA_TTES + TTE_DATA], %l2
add %l0, CSA_TTES, %l1
clr %l2
/*
* Map the per-cpu page. It uses a locked tlb entry.
* Map the per-cpu pages.
*/
3: sllx %l2, TTE_SHIFT, %l3
add %l1, %l3, %l3
ldx [%l3 + TTE_VPN], %l4
ldx [%l3 + TTE_DATA], %l5
wr %g0, ASI_DMMU, %asi
sllx %l1, PAGE_SHIFT, %l1
stxa %l1, [%g0 + AA_DMMU_TAR] %asi
stxa %l2, [%g0] ASI_DTLB_DATA_IN_REG
sllx %l4, PAGE_SHIFT, %l4
stxa %l4, [%g0 + AA_DMMU_TAR] %asi
stxa %l5, [%g0] ASI_DTLB_DATA_IN_REG
membar #Sync
add %l2, 1, %l2
cmp %l2, PCPU_PAGES
bne %xcc, 3b
nop
/*
* Get onto our per-cpu panic stack, which precedes the struct pcpu
* in the per-cpu page.
*/
ldx [%l0 + CSA_PCPU], %l1
set PCPU_PAGES * PAGE_SIZE - PC_SIZEOF, %l2
add %l1, %l2, %l1
sub %l1, SPOFF + CCFSZ, %sp
@ -164,12 +173,11 @@ ENTRY(mp_startup)
CATR(KTR_SMP,
"_mp_start: bootstrap cpuid=%d mid=%d pcpu=%#lx data=%#lx sp=%#lx"
, %g1, %g2, %g3, 7, 8, 9)
lduw [%l2 + PC_CPUID], %g2
lduw [%l1 + PC_CPUID], %g2
stx %g2, [%g1 + KTR_PARM1]
lduw [%l2 + PC_MID], %g2
lduw [%l1 + PC_MID], %g2
stx %g2, [%g1 + KTR_PARM2]
stx %l2, [%g1 + KTR_PARM3]
stx %l1, [%g1 + KTR_PARM4]
stx %l1, [%g1 + KTR_PARM3]
stx %sp, [%g1 + KTR_PARM5]
9:
#endif

View File

@ -120,9 +120,9 @@ ENTRY(mp_startup)
/*
* Wait till its our turn to bootstrap.
*/
1: lduw [%l0 + CSA_MID], %l1
2: lduw [%l0 + CSA_MID], %l1
cmp %l1, %o0
bne %xcc, 1b
bne %xcc, 2b
nop
#if KTR_COMPILE & KTR_SMP
@ -132,25 +132,34 @@ ENTRY(mp_startup)
9:
#endif
/*
* Find our per-cpu page and the tte data that we will use to map it.
*/
ldx [%l0 + CSA_TTES + TTE_VPN], %l1
ldx [%l0 + CSA_TTES + TTE_DATA], %l2
add %l0, CSA_TTES, %l1
clr %l2
/*
* Map the per-cpu page. It uses a locked tlb entry.
* Map the per-cpu pages.
*/
3: sllx %l2, TTE_SHIFT, %l3
add %l1, %l3, %l3
ldx [%l3 + TTE_VPN], %l4
ldx [%l3 + TTE_DATA], %l5
wr %g0, ASI_DMMU, %asi
sllx %l1, PAGE_SHIFT, %l1
stxa %l1, [%g0 + AA_DMMU_TAR] %asi
stxa %l2, [%g0] ASI_DTLB_DATA_IN_REG
sllx %l4, PAGE_SHIFT, %l4
stxa %l4, [%g0 + AA_DMMU_TAR] %asi
stxa %l5, [%g0] ASI_DTLB_DATA_IN_REG
membar #Sync
add %l2, 1, %l2
cmp %l2, PCPU_PAGES
bne %xcc, 3b
nop
/*
* Get onto our per-cpu panic stack, which precedes the struct pcpu
* in the per-cpu page.
*/
ldx [%l0 + CSA_PCPU], %l1
set PCPU_PAGES * PAGE_SIZE - PC_SIZEOF, %l2
add %l1, %l2, %l1
sub %l1, SPOFF + CCFSZ, %sp
@ -164,12 +173,11 @@ ENTRY(mp_startup)
CATR(KTR_SMP,
"_mp_start: bootstrap cpuid=%d mid=%d pcpu=%#lx data=%#lx sp=%#lx"
, %g1, %g2, %g3, 7, 8, 9)
lduw [%l2 + PC_CPUID], %g2
lduw [%l1 + PC_CPUID], %g2
stx %g2, [%g1 + KTR_PARM1]
lduw [%l2 + PC_MID], %g2
lduw [%l1 + PC_MID], %g2
stx %g2, [%g1 + KTR_PARM2]
stx %l2, [%g1 + KTR_PARM3]
stx %l1, [%g1 + KTR_PARM4]
stx %l1, [%g1 + KTR_PARM3]
stx %sp, [%g1 + KTR_PARM5]
9:
#endif

View File

@ -89,7 +89,9 @@ static ih_func_t cpu_ipi_stop;
* since the other processors will use it before the boot cpu enters the
* kernel.
*/
struct cpu_start_args cpu_start_args = { -1, -1, 0, 0 };
struct cpu_start_args cpu_start_args = { 0, -1, -1, 0, 0 };
struct ipi_tlb_args ipi_tlb_args;
struct ipi_level_args ipi_level_args;
vm_offset_t mp_tramp;
@ -252,9 +254,10 @@ cpu_mp_unleash(void *v)
u_long s;
int i;
ctx_min = 1;
ctx_inc = (8192 - 1) / mp_ncpus;
ctx_min = TLB_CTX_USER_MIN;
ctx_inc = (TLB_CTX_USER_MAX - 1) / mp_ncpus;
csa = &cpu_start_args;
csa->csa_count = mp_ncpus;
SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
pc->pc_tlb_ctx = ctx_min;
pc->pc_tlb_ctx_min = ctx_min;
@ -279,12 +282,18 @@ cpu_mp_unleash(void *v)
TD_L | TD_CP | TD_CV | TD_P | TD_W;
}
csa->csa_state = 0;
csa->csa_pcpu = pc->pc_addr;
csa->csa_mid = pc->pc_mid;
s = intr_disable();
while (csa->csa_state != CPU_BOOTSTRAP)
;
intr_restore(s);
cpu_ipi_send(pc->pc_mid, 0, (u_long)tl_ipi_test, 0);
}
membar(StoreLoad);
csa->csa_count = 0;
}
void
@ -300,8 +309,10 @@ cpu_mp_bootstrap(struct pcpu *pc)
PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
csa->csa_count--;
membar(StoreLoad);
csa->csa_state = CPU_BOOTSTRAP;
for (;;)
while (csa->csa_count != 0)
;
binuptime(PCPU_PTR(switchtime));
@ -340,15 +351,13 @@ cpu_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
void
cpu_ipi_send(u_int mid, u_long d0, u_long d1, u_long d2)
{
u_long pstate;
u_long s;
int i;
KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_BUSY) == 0,
("ipi_send: outstanding dispatch"));
pstate = rdpr(pstate);
for (i = 0; i < IPI_RETRIES; i++) {
if (pstate & PSTATE_IE)
wrpr(pstate, pstate, PSTATE_IE);
s = intr_disable();
stxa(AA_SDB_INTR_D0, ASI_SDB_INTR_W, d0);
stxa(AA_SDB_INTR_D1, ASI_SDB_INTR_W, d1);
stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
@ -356,7 +365,7 @@ cpu_ipi_send(u_int mid, u_long d0, u_long d1, u_long d2)
membar(Sync);
while (ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_BUSY)
;
wrpr(pstate, pstate, 0);
intr_restore(s);
if ((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_NACK) == 0)
return;
}

View File

@ -73,6 +73,7 @@
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/vmmeter.h>
@ -94,6 +95,7 @@
#include <machine/frame.h>
#include <machine/md_var.h>
#include <machine/pv.h>
#include <machine/smp.h>
#include <machine/tlb.h>
#include <machine/tte.h>
#include <machine/tsb.h>
@ -409,22 +411,28 @@ pmap_bootstrap(vm_offset_t ekva)
void
pmap_map_tsb(void)
{
struct tte tte;
vm_offset_t va;
vm_offset_t pa;
u_long data;
u_int slot;
u_long s;
int i;
s = intr_disable();
/*
* Map the 4mb tsb pages.
*/
for (i = 0; i < KVA_PAGES; i++) {
slot = TLB_SLOT_TSB_KERNEL_MIN;
for (i = 0; i < KVA_PAGES; i++, slot++) {
va = (vm_offset_t)tsb_kernel + i * PAGE_SIZE_4M;
pa = tsb_kernel_phys + i * PAGE_SIZE_4M;
tte.tte_vpn = TV_VPN(va);
tte.tte_data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP |
TD_CV | TD_P | TD_W;
tlb_store_slot(TLB_DTLB, va, TLB_CTX_KERNEL, tte,
TLB_SLOT_TSB_KERNEL_MIN + i);
data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
TD_P | TD_W;
stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
TLB_TAR_CTX(TLB_CTX_KERNEL));
stxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_IN_REG, data);
membar(Sync);
}
/*
@ -441,6 +449,8 @@ pmap_map_tsb(void)
*/
stxa(AA_DMMU_SCXR, ASI_DMMU, TLB_CTX_KERNEL);
membar(Sync);
intr_restore(s);
}
/*

View File

@ -41,6 +41,7 @@
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/smp.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@ -54,6 +55,7 @@
#include <machine/frame.h>
#include <machine/pmap.h>
#include <machine/pv.h>
#include <machine/smp.h>
#include <machine/tte.h>
#include <machine/tlb.h>
#include <machine/tsb.h>

View File

@ -52,6 +52,7 @@
#include <sys/systm.h>
#include <sys/pioctl.h>
#include <sys/proc.h>
#include <sys/smp.h>
#include <sys/syscall.h>
#include <sys/sysent.h>
#include <sys/user.h>
@ -71,6 +72,7 @@
#include <machine/intr_machdep.h>
#include <machine/pcb.h>
#include <machine/pv.h>
#include <machine/smp.h>
#include <machine/trap.h>
#include <machine/tstate.h>
#include <machine/tte.h>

View File

@ -39,6 +39,7 @@
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
@ -58,6 +59,7 @@
#include <machine/trap.h>
#include <machine/pmap.h>
#include <machine/pv.h>
#include <machine/smp.h>
#include <machine/tlb.h>
#include <machine/tsb.h>
#include <machine/tte.h>