- USIII-based machines can consist of CPUs having different cache

sizes (and running at different frequencies) so move the cacheinfo
  to the PCPU data. While at it, remove some redundant and/or unused
  members from struct cacheinfo.
- In sparc64_init don't assume the first CPU node we find in the OFW
  device tree is the BSP.
This commit is contained in:
marius 2008-09-02 21:13:54 +00:00
parent 6a6f30db5a
commit 6b0f6beaa5
9 changed files with 85 additions and 67 deletions

View File

@ -45,10 +45,6 @@
#ifndef _MACHINE_CACHE_H_
#define _MACHINE_CACHE_H_
#ifndef LOCORE
#include <dev/ofw/openfirm.h>
#endif
#define DCACHE_COLOR_BITS (1)
#define DCACHE_COLORS (1 << DCACHE_COLOR_BITS)
#define DCACHE_COLOR_MASK (DCACHE_COLORS - 1)
@ -80,31 +76,27 @@
* Cache control information
*/
struct cacheinfo {
u_int c_enabled; /* true => cache is enabled */
u_int ic_size; /* instruction cache */
u_int ic_set;
u_int ic_l2set;
u_int ic_assoc;
u_int ic_linesize;
u_int dc_size; /* data cache */
u_int dc_l2size;
u_int dc_assoc;
u_int dc_linesize;
u_int ec_size; /* external cache info */
u_int ec_assoc;
u_int ec_l2set;
u_int ec_linesize;
u_int ec_l2linesize;
};
#ifdef _KERNEL
struct pcpu;
typedef void cache_enable_t(void);
typedef void cache_flush_t(void);
typedef void dcache_page_inval_t(vm_paddr_t pa);
typedef void icache_page_inval_t(vm_paddr_t pa);
void cache_init(phandle_t node);
void cache_init(struct pcpu *pcpu);
cache_enable_t cheetah_cache_enable;
cache_flush_t cheetah_cache_flush;
@ -121,8 +113,6 @@ extern cache_flush_t *cache_flush;
extern dcache_page_inval_t *dcache_page_inval;
extern icache_page_inval_t *icache_page_inval;
extern struct cacheinfo cache;
#endif /* KERNEL */
#endif /* !LOCORE */

View File

@ -31,6 +31,7 @@
#define _MACHINE_PCPU_H_
#include <machine/asmacros.h>
#include <machine/cache.h>
#include <machine/frame.h>
#include <machine/intr_machdep.h>
@ -43,6 +44,7 @@ struct pmap;
* point at the globaldata structure.
*/
#define PCPU_MD_FIELDS \
struct cacheinfo pc_cache; \
struct intr_request pc_irpool[IR_FREE]; \
struct intr_request *pc_irhead; \
struct intr_request **pc_irtail; \

View File

@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/pcpu.h>
#include <dev/ofw/openfirm.h>
@ -81,8 +82,6 @@ __FBSDID("$FreeBSD$");
#include <machine/tlb.h>
#include <machine/ver.h>
struct cacheinfo cache;
cache_enable_t *cache_enable;
cache_flush_t *cache_flush;
dcache_page_inval_t *dcache_page_inval;
@ -94,33 +93,41 @@ icache_page_inval_t *icache_page_inval;
* Fill in the cache parameters using the cpu node.
*/
void
cache_init(phandle_t node)
cache_init(struct pcpu *pcpu)
{
u_long set;
if (OF_GET(node, "icache-size", cache.ic_size) == -1 ||
OF_GET(node, "icache-line-size", cache.ic_linesize) == -1 ||
OF_GET(node, "icache-associativity", cache.ic_assoc) == -1 ||
OF_GET(node, "dcache-size", cache.dc_size) == -1 ||
OF_GET(node, "dcache-line-size", cache.dc_linesize) == -1 ||
OF_GET(node, "dcache-associativity", cache.dc_assoc) == -1 ||
OF_GET(node, "ecache-size", cache.ec_size) == -1 ||
OF_GET(node, "ecache-line-size", cache.ec_linesize) == -1 ||
OF_GET(node, "ecache-associativity", cache.ec_assoc) == -1)
if (OF_GET(pcpu->pc_node, "icache-size",
pcpu->pc_cache.ic_size) == -1 ||
OF_GET(pcpu->pc_node, "icache-line-size",
pcpu->pc_cache.ic_linesize) == -1 ||
OF_GET(pcpu->pc_node, "icache-associativity",
pcpu->pc_cache.ic_assoc) == -1 ||
OF_GET(pcpu->pc_node, "dcache-size",
pcpu->pc_cache.dc_size) == -1 ||
OF_GET(pcpu->pc_node, "dcache-line-size",
pcpu->pc_cache.dc_linesize) == -1 ||
OF_GET(pcpu->pc_node, "dcache-associativity",
pcpu->pc_cache.dc_assoc) == -1 ||
OF_GET(pcpu->pc_node, "ecache-size",
pcpu->pc_cache.ec_size) == -1 ||
OF_GET(pcpu->pc_node, "ecache-line-size",
pcpu->pc_cache.ec_linesize) == -1 ||
OF_GET(pcpu->pc_node, "ecache-associativity",
pcpu->pc_cache.ec_assoc) == -1)
panic("cache_init: could not retrieve cache parameters");
cache.ic_set = cache.ic_size / cache.ic_assoc;
cache.ic_l2set = ffs(cache.ic_set) - 1;
if ((cache.ic_set & ~(1UL << cache.ic_l2set)) != 0)
set = pcpu->pc_cache.ic_size / pcpu->pc_cache.ic_assoc;
if ((set & ~(1UL << (ffs(set) - 1))) != 0)
panic("cache_init: I$ set size not a power of 2");
cache.dc_l2size = ffs(cache.dc_size) - 1;
if ((cache.dc_size & ~(1UL << cache.dc_l2size)) != 0)
if ((pcpu->pc_cache.dc_size &
~(1UL << (ffs(pcpu->pc_cache.dc_size) - 1))) != 0)
panic("cache_init: D$ size not a power of 2");
if (((cache.dc_size / cache.dc_assoc) / PAGE_SIZE) != DCACHE_COLORS)
if (((pcpu->pc_cache.dc_size / pcpu->pc_cache.dc_assoc) /
PAGE_SIZE) != DCACHE_COLORS)
panic("cache_init: too many D$ colors");
set = cache.ec_size / cache.ec_assoc;
cache.ec_l2set = ffs(set) - 1;
if ((set & ~(1UL << cache.ec_l2set)) != 0)
set = pcpu->pc_cache.ec_size / pcpu->pc_cache.ec_assoc;
if ((set & ~(1UL << (ffs(set) - 1))) != 0)
panic("cache_init: E$ set size not a power of 2");
if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) {

View File

@ -72,7 +72,7 @@ cheetah_dcache_page_inval(vm_paddr_t spa)
KASSERT((spa & PAGE_MASK) == 0, ("%s: pa not page aligned", __func__));
cookie = ipi_dcache_page_inval(tl_ipi_cheetah_dcache_page_inval, spa);
for (pa = spa; pa < spa + PAGE_SIZE; pa += cache.dc_linesize)
for (pa = spa; pa < spa + PAGE_SIZE; pa += PCPU_GET(cache.dc_linesize))
stxa_sync(pa, ASI_DCACHE_INVALIDATE, 0);
ipi_wait(cookie);
}

View File

@ -188,11 +188,12 @@ ASSYM(PM_TSB_MISS_COUNT, offsetof(struct pmap, pm_tsb_miss_count));
ASSYM(PM_TSB_CAP_MISS_COUNT, offsetof(struct pmap, pm_tsb_cap_miss_count));
#endif
#ifdef SUN4U
ASSYM(PC_CACHE, offsetof(struct pcpu, pc_cache));
ASSYM(PC_MID, offsetof(struct pcpu, pc_mid));
ASSYM(PC_PMAP, offsetof(struct pcpu, pc_pmap));
ASSYM(PC_TLB_CTX, offsetof(struct pcpu, pc_tlb_ctx));
ASSYM(PC_TLB_CTX_MAX, offsetof(struct pcpu, pc_tlb_ctx_max));
ASSYM(PC_TLB_CTX_MIN, offsetof(struct pcpu, pc_tlb_ctx_min));
ASSYM(PC_PMAP, offsetof(struct pcpu, pc_pmap));
#endif
ASSYM(IR_NEXT, offsetof(struct intr_request, ir_next));

View File

@ -279,6 +279,7 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec)
phandle_t child;
phandle_t root;
u_int clock;
uint32_t portid;
end = 0;
kmdp = NULL;
@ -314,12 +315,40 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec)
init_param1();
/*
* Prime our per-CPU data page for use. Note, we are using it for
* our stack, so don't pass the real size (PAGE_SIZE) to pcpu_init
* or it'll zero it out from under us.
*/
pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1;
pcpu_init(pc, 0, sizeof(struct pcpu));
pc->pc_addr = (vm_offset_t)pcpu0;
pc->pc_mid = UPA_CR_GET_MID(ldxa(0, ASI_UPA_CONFIG_REG));
pc->pc_tlb_ctx = TLB_CTX_USER_MIN;
pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN;
pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX;
/*
* Determine the OFW node (and ensure the
* BSP is in the device tree in the first place).
*/
pc->pc_node = 0;
root = OF_peer(0);
for (child = OF_child(root); child != 0; child = OF_peer(child)) {
OF_getprop(child, "device_type", type, sizeof(type));
if (strcmp(type, "cpu") == 0)
if (OF_getprop(child, "device_type", type, sizeof(type)) <= 0)
continue;
if (strcmp(type, "cpu") != 0)
continue;
if (OF_getprop(child, cpu_impl < CPU_IMPL_ULTRASPARCIII ?
"upa-portid" : "portid", &portid, sizeof(portid)) <= 0)
continue;
if (portid == pc->pc_mid) {
pc->pc_node = child;
break;
}
}
if (pc->pc_node == 0)
OF_exit();
/*
* Initialize the tick counter. Must be before the console is inited
@ -353,8 +382,8 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec)
end = (vm_offset_t)_end;
}
cache_init(child);
uma_set_align(cache.dc_linesize - 1);
cache_init(pc);
uma_set_align(pc->pc_cache.dc_linesize - 1);
cpu_block_copy = bcopy;
cpu_block_zero = bzero;
@ -397,7 +426,7 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec)
intr_init1();
/*
* Initialize proc0 stuff (p_contested needs to be done early).
* Initialize proc0, set kstack0, frame0, curthread and curpcb.
*/
proc_linkup0(&proc0, &thread0);
proc0.p_md.md_sigtramp = NULL;
@ -407,22 +436,8 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec)
(thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
frame0.tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_PRIV;
thread0.td_frame = &frame0;
/*
* Prime our per-cpu data page for use. Note, we are using it for our
* stack, so don't pass the real size (PAGE_SIZE) to pcpu_init or
* it'll zero it out from under us.
*/
pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1;
pcpu_init(pc, 0, sizeof(struct pcpu));
pc->pc_curthread = &thread0;
pc->pc_curpcb = thread0.td_pcb;
pc->pc_mid = UPA_CR_GET_MID(ldxa(0, ASI_UPA_CONFIG_REG));
pc->pc_addr = (vm_offset_t)pcpu0;
pc->pc_node = child;
pc->pc_tlb_ctx = TLB_CTX_USER_MIN;
pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN;
pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX;
/*
* Initialize global registers.

View File

@ -57,9 +57,8 @@ ENTRY(tl_ipi_spitfire_dcache_page_inval)
ldx [%g5 + ICA_PA], %g6
srlx %g6, PAGE_SHIFT - DC_TAG_SHIFT, %g6
SET(cache, %g3, %g2)
lduw [%g2 + DC_SIZE], %g3
lduw [%g2 + DC_LINESIZE], %g4
lduw [PCPU(CACHE) + DC_SIZE], %g3
lduw [PCPU(CACHE) + DC_LINESIZE], %g4
sub %g3, %g4, %g2
1: ldxa [%g2] ASI_DCACHE_TAG, %g1
@ -98,9 +97,8 @@ ENTRY(tl_ipi_spitfire_icache_page_inval)
ldx [%g5 + ICA_PA], %g6
srlx %g6, PAGE_SHIFT - IC_TAG_SHIFT, %g6
SET(cache, %g3, %g2)
lduw [%g2 + IC_SIZE], %g3
lduw [%g2 + IC_LINESIZE], %g4
lduw [PCPU(CACHE) + IC_SIZE], %g3
lduw [PCPU(CACHE) + IC_LINESIZE], %g4
sub %g3, %g4, %g2
1: ldda [%g2] ASI_ICACHE_TAG, %g0 /*, %g1 */
@ -140,8 +138,7 @@ ENTRY(tl_ipi_cheetah_dcache_page_inval)
set PAGE_SIZE, %g2
add %g1, %g2, %g3
SET(cache, %g4, %g2)
lduw [%g2 + DC_LINESIZE], %g2
lduw [PCPU(CACHE) + DC_LINESIZE], %g2
1: stxa %g0, [%g1] ASI_DCACHE_INVALIDATE
membar #Sync

View File

@ -301,6 +301,8 @@ cpu_mp_start(void)
pc->pc_mid = mid;
pc->pc_node = child;
cache_init(pc);
all_cpus |= 1 << cpuid;
intr_add_cpu(cpuid);
}

View File

@ -72,9 +72,11 @@ spitfire_cache_flush(void)
{
u_long addr;
for (addr = 0; addr < cache.dc_size; addr += cache.dc_linesize)
for (addr = 0; addr < PCPU_GET(cache.dc_size);
addr += PCPU_GET(cache.dc_linesize))
stxa_sync(addr, ASI_DCACHE_TAG, 0);
for (addr = 0; addr < cache.ic_size; addr += cache.ic_linesize)
for (addr = 0; addr < PCPU_GET(cache.ic_size);
addr += PCPU_GET(cache.ic_linesize))
stxa_sync(addr, ASI_ICACHE_TAG, 0);
}
@ -93,7 +95,8 @@ spitfire_dcache_page_inval(vm_paddr_t pa)
PMAP_STATS_INC(spitfire_dcache_npage_inval);
target = pa >> (PAGE_SHIFT - DC_TAG_SHIFT);
cookie = ipi_dcache_page_inval(tl_ipi_spitfire_dcache_page_inval, pa);
for (addr = 0; addr < cache.dc_size; addr += cache.dc_linesize) {
for (addr = 0; addr < PCPU_GET(cache.dc_size);
addr += PCPU_GET(cache.dc_linesize)) {
tag = ldxa(addr, ASI_DCACHE_TAG);
if (((tag >> DC_VALID_SHIFT) & DC_VALID_MASK) == 0)
continue;
@ -121,7 +124,8 @@ spitfire_icache_page_inval(vm_paddr_t pa)
PMAP_STATS_INC(spitfire_icache_npage_inval);
target = pa >> (PAGE_SHIFT - IC_TAG_SHIFT);
cookie = ipi_icache_page_inval(tl_ipi_spitfire_icache_page_inval, pa);
for (addr = 0; addr < cache.ic_size; addr += cache.ic_linesize) {
for (addr = 0; addr < PCPU_GET(cache.ic_size);
addr += PCPU_GET(cache.ic_linesize)) {
__asm __volatile("ldda [%1] %2, %%g0" /*, %g1 */
: "=r" (tag) : "r" (addr), "n" (ASI_ICACHE_TAG));
if (((tag >> IC_VALID_SHIFT) & IC_VALID_MASK) == 0)