Properly initialize Armada XP MP subsystem.

- correct setting of Auxiliary Control Register for MP mode
- correct setting of Auxiliarty Debug registers
- cleanup management of memory contains bootup code
- early initialization of Coherency Fabric (MP and not-MP mode)
- enable Snoop Filtering

Obtained from:	Semihalf
This commit is contained in:
Grzegorz Bernacki 2013-05-06 14:12:36 +00:00
parent 3a1f2172c0
commit 5c39c3ffa2
6 changed files with 177 additions and 89 deletions

View File

@ -37,6 +37,10 @@ __FBSDID("$FreeBSD$");
.Lpj4b_cache_line_size: .Lpj4b_cache_line_size:
.word _C_LABEL(arm_pdcache_line_size) .word _C_LABEL(arm_pdcache_line_size)
.Lpj4b_sf_ctrl_reg:
.word 0xf1021820
ENTRY(pj4b_setttb) ENTRY(pj4b_setttb)
/* Cache synchronization is not required as this core has PIPT caches */ /* Cache synchronization is not required as this core has PIPT caches */
mcr p15, 0, r1, c7, c10, 4 /* drain the write buffer */ mcr p15, 0, r1, c7, c10, 4 /* drain the write buffer */
@ -198,13 +202,42 @@ ENTRY(get_core_id)
END(get_core_id) END(get_core_id)
ENTRY(pj4b_config) ENTRY(pj4b_config)
/* Set Auxiliary Debug Modes Control 0 register */
mrc p15, 1, r0, c15, c1, 0
/* ARMADAXP errata fix: ARM-CPU-6136 */
bic r0, r0, #(1 << 12) /* LDSTM first issue is single word */
orr r0, r0, #(1 << 22) /* DVM_WAKEUP disable */
mcr p15, 1, r0, c15, c1, 0
/* Set Auxiliary Debug Modes Control 1 register */
mrc p15, 1, r0, c15, c1, 1
/* ARMADAXP errata fix: ARM-CPU-6409 */
bic r0, r0, #(1 << 2) /* Disable static branch prediction */
orr r0, r0, #(1 << 5) /* STREX backoff disable */
orr r0, r0, #(1 << 8) /* Internal parity handling disable */
orr r0, r0, #(1 << 16) /* Disable data transfer for clean line */
mcr p15, 1, r0, c15, c1, 1
/* Set Auxiliary Function Modes Control 0 register */
mrc p15, 1, r0, c15, c2, 0
#if defined(SMP)
orr r0, r0, #(1 << 1) /* SMP/nAMP enabled (coherency) */
#endif
orr r0, r0, #(1 << 2) /* L1 parite enable */
orr r0, r0, #(1 << 8) /* Cache and TLB maintenance broadcast enable */
mcr p15, 1, r0, c15, c2, 0
/* Set Auxiliary Debug Modes Control 2 register */ /* Set Auxiliary Debug Modes Control 2 register */
mrc p15, 1, r0, c15, c1, 2 mrc p15, 1, r0, c15, c1, 2
bic r0, r0, #(1 << 23) bic r0, r0, #(1 << 23) /* Enable fast LDR */
orr r0, r0, #(1 << 25) orr r0, r0, #(1 << 25) /* Intervention Interleave disable */
orr r0, r0, #(1 << 27) orr r0, r0, #(1 << 27) /* Critical word first sequencing disable */
orr r0, r0, #(1 << 29) orr r0, r0, #(1 << 29) /* Disable MO device read / write */
orr r0, r0, #(1 << 30) orr r0, r0, #(1 << 30) /* L1 cache strict round-robin replacement policy*/
orr r0, r0, #(1 << 31) /* Enable write evict */
mcr p15, 1, r0, c15, c1, 2 mcr p15, 1, r0, c15, c1, 2
#if defined(SMP) #if defined(SMP)
/* Set SMP mode in Auxiliary Control Register */ /* Set SMP mode in Auxiliary Control Register */
@ -212,6 +245,18 @@ ENTRY(pj4b_config)
orr r0, r0, #(1 << 5) orr r0, r0, #(1 << 5)
mcr p15, 0, r0, c1, c0, 1 mcr p15, 0, r0, c1, c0, 1
#endif #endif
/* Load CPU number */
mrc p15, 0, r0, c0, c0, 5
and r0, r0, #0xf
/* SF Enable and invalidate */
ldr r1, .Lpj4b_sf_ctrl_reg
ldr r2, [r1, r0, lsl #8]
orr r2, r2, #(1 << 0)
bic r2, r2, #(1 << 8)
str r2, [r1, r0, lsl #8]
RET RET
END(pj4b_config) END(pj4b_config)

View File

@ -265,7 +265,11 @@ mmu_init_table:
/* map VA 0xc0000000..0xc3ffffff to PA */ /* map VA 0xc0000000..0xc3ffffff to PA */
MMU_INIT(KERNBASE, PHYSADDR, 64, L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW)) MMU_INIT(KERNBASE, PHYSADDR, 64, L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW))
MMU_INIT(0x48000000, 0x48000000, 1, L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW)) MMU_INIT(0x48000000, 0x48000000, 1, L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW))
#endif #if defined(CPU_MV_PJ4B)
/* map VA 0xf1000000..0xd0000000 to PA */
MMU_INIT(0xf1000000, 0xd0000000, 1, L1_TYPE_S|L1_SHARED|L1_S_B|L1_S_AP(AP_KRW))
#endif /* CPU_MV_PJ4B */
#endif /* SMP */
.word 0 /* end of table */ .word 0 /* end of table */
#endif #endif
.Lstart: .Lstart:

View File

@ -127,6 +127,13 @@ cpu_mp_start(void)
KERNPHYSADDR + KERNVIRTADDR) >> L1_S_SHIFT] = KERNPHYSADDR + KERNVIRTADDR) >> L1_S_SHIFT] =
L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW)|L1_S_DOM(PMAP_DOMAIN_KERNEL)|addr; L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW)|L1_S_DOM(PMAP_DOMAIN_KERNEL)|addr;
} }
#if defined(CPU_MV_PJ4B)
/* Add ARMADAXP registers required for snoop filter initialization */
((int *)(temp_pagetable_va))[0xf1000000 >> L1_S_SHIFT] =
L1_TYPE_S|L1_SHARED|L1_S_B|L1_S_AP(AP_KRW)|0xd0000000;
#endif
temp_pagetable = (void*)(vtophys(temp_pagetable_va)); temp_pagetable = (void*)(vtophys(temp_pagetable_va));
cpu_idcache_wbinv_all(); cpu_idcache_wbinv_all();
cpu_l2cache_wbinv_all(); cpu_l2cache_wbinv_all();

View File

@ -51,6 +51,9 @@ __FBSDID("$FreeBSD$");
(0x0F & (sar >> 24))) (0x0F & (sar >> 24)))
static uint32_t count_l2clk(void); static uint32_t count_l2clk(void);
void armadaxp_l2_init(void);
void armadaxp_init_coher_fabric(void);
int platform_get_ncpus(void);
#define ARMADAXP_L2_BASE (MV_BASE + 0x8000) #define ARMADAXP_L2_BASE (MV_BASE + 0x8000)
#define ARMADAXP_L2_CTRL 0x100 #define ARMADAXP_L2_CTRL 0x100
@ -77,7 +80,11 @@ static uint32_t count_l2clk(void);
#define ARMADAXP_L2_FLUSH_PHYS 0x7F0 #define ARMADAXP_L2_FLUSH_PHYS 0x7F0
#define ARMADAXP_L2_FLUSH_WAY 0x7FC #define ARMADAXP_L2_FLUSH_WAY 0x7FC
#define COHER_FABRIC_CFU 0x228 #define MV_COHERENCY_FABRIC_BASE (MV_MBUS_BRIDGE_BASE + 0x200)
#define COHER_FABRIC_CTRL 0x00
#define COHER_FABRIC_CONF 0x04
#define COHER_FABRIC_CFU 0x28
#define COHER_FABRIC_CIB_CTRL 0x80
/* XXX Make gpio driver optional and remove it */ /* XXX Make gpio driver optional and remove it */
struct resource_spec mv_gpio_res[] = { struct resource_spec mv_gpio_res[] = {
@ -188,8 +195,46 @@ get_l2clk(void)
return (l2clk_freq); return (l2clk_freq);
} }
void armadaxp_l2_init(void); static uint32_t
void armadaxp_l2_idcache_inv_all(void); read_coher_fabric(uint32_t reg)
{
return (bus_space_read_4(fdtbus_bs_tag, MV_COHERENCY_FABRIC_BASE, reg));
}
static void
write_coher_fabric(uint32_t reg, uint32_t val)
{
bus_space_write_4(fdtbus_bs_tag, MV_COHERENCY_FABRIC_BASE, reg, val);
}
int
platform_get_ncpus(void)
{
#if !defined(SMP)
return (1);
#else
return ((read_coher_fabric(COHER_FABRIC_CONF) & 0xf) + 1);
#endif
}
void
armadaxp_init_coher_fabric(void)
{
uint32_t val, cpus, mask;
cpus = platform_get_ncpus();
mask = (1 << cpus) - 1;
val = read_coher_fabric(COHER_FABRIC_CTRL);
val |= (mask << 24);
write_coher_fabric(COHER_FABRIC_CTRL, val);
val = read_coher_fabric(COHER_FABRIC_CONF);
val |= (mask << 24);
val |= (1 << 15);
write_coher_fabric(COHER_FABRIC_CONF, val);
}
#define ALL_WAYS 0xffffffff #define ALL_WAYS 0xffffffff
@ -208,7 +253,7 @@ write_l2_cache(uint32_t reg, uint32_t val)
bus_space_write_4(fdtbus_bs_tag, ARMADAXP_L2_BASE, reg, val); bus_space_write_4(fdtbus_bs_tag, ARMADAXP_L2_BASE, reg, val);
} }
void static void
armadaxp_l2_idcache_inv_all(void) armadaxp_l2_idcache_inv_all(void)
{ {
write_l2_cache(ARMADAXP_L2_INV_WAY, ALL_WAYS); write_l2_cache(ARMADAXP_L2_INV_WAY, ALL_WAYS);
@ -233,11 +278,6 @@ armadaxp_l2_init(void)
/* Clear pending L2 interrupts */ /* Clear pending L2 interrupts */
write_l2_cache(ARMADAXP_L2_INT_CAUSE, 0x1ff); write_l2_cache(ARMADAXP_L2_INT_CAUSE, 0x1ff);
/* Enable Cache and TLB maintenance broadcast */
__asm__ __volatile__ ("mrc p15, 1, %0, c15, c2, 0" : "=r"(reg));
reg |= (1 << 8);
__asm__ __volatile__ ("mcr p15, 1, %0, c15, c2, 0" : :"r"(reg));
/* Enable l2 cache */ /* Enable l2 cache */
reg = read_l2_cache(ARMADAXP_L2_CTRL); reg = read_l2_cache(ARMADAXP_L2_CTRL);
write_l2_cache(ARMADAXP_L2_CTRL, reg | L2_ENABLE); write_l2_cache(ARMADAXP_L2_CTRL, reg | L2_ENABLE);
@ -254,10 +294,14 @@ armadaxp_l2_init(void)
* Enable Cache maintenance operation propagation in coherency fabric * Enable Cache maintenance operation propagation in coherency fabric
* Change point of coherency and point of unification to DRAM. * Change point of coherency and point of unification to DRAM.
*/ */
reg = bus_space_read_4(fdtbus_bs_tag, MV_MBUS_BRIDGE_BASE, reg = read_coher_fabric(COHER_FABRIC_CFU);
COHER_FABRIC_CFU);
reg |= (1 << 17) | (1 << 18); reg |= (1 << 17) | (1 << 18);
bus_space_write_4(fdtbus_bs_tag, MV_MBUS_BRIDGE_BASE, COHER_FABRIC_CFU, write_coher_fabric(COHER_FABRIC_CFU, reg);
reg);
/* Coherent IO Bridge initialization */
reg = read_coher_fabric(COHER_FABRIC_CIB_CTRL);
reg &= ~(7 << 16);
reg |= (7 << 16);
write_coher_fabric(COHER_FABRIC_CIB_CTRL, reg);
} }

View File

@ -33,21 +33,21 @@
#include <sys/mutex.h> #include <sys/mutex.h>
#include <sys/smp.h> #include <sys/smp.h>
#include <vm/vm.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
#include <machine/smp.h> #include <machine/smp.h>
#include <machine/fdt.h> #include <machine/fdt.h>
#include <machine/armreg.h>
#include <arm/mv/mvwin.h> #include <arm/mv/mvwin.h>
static int platform_get_ncpus(void);
#define MV_AXP_CPU_DIVCLK_BASE (MV_BASE + 0x18700) #define MV_AXP_CPU_DIVCLK_BASE (MV_BASE + 0x18700)
#define CPU_DIVCLK_CTRL0 0x00 #define CPU_DIVCLK_CTRL0 0x00
#define CPU_DIVCLK_CTRL2_RATIO_FULL0 0x08 #define CPU_DIVCLK_CTRL2_RATIO_FULL0 0x08
#define CPU_DIVCLK_CTRL2_RATIO_FULL1 0x0c #define CPU_DIVCLK_CTRL2_RATIO_FULL1 0x0c
#define CPU_DIVCLK_MASK(x) (~(0xff << (8 * (x))))
#define MV_COHERENCY_FABRIC_BASE (MV_MBUS_BRIDGE_BASE + 0x200)
#define COHER_FABRIC_CTRL 0x00
#define COHER_FABRIC_CONF 0x04
#define CPU_PMU(x) (MV_BASE + 0x22100 + (0x100 * (x))) #define CPU_PMU(x) (MV_BASE + 0x22100 + (0x100 * (x)))
#define CPU_PMU_BOOT 0x24 #define CPU_PMU_BOOT 0x24
@ -57,20 +57,8 @@ static int platform_get_ncpus(void);
#define CPU_RESUME_CONTROL (0x20988) #define CPU_RESUME_CONTROL (0x20988)
/* Coherency Fabric registers */ void armadaxp_init_coher_fabric(void);
static uint32_t int platform_get_ncpus(void);
read_coher_fabric(uint32_t reg)
{
return (bus_space_read_4(fdtbus_bs_tag, MV_COHERENCY_FABRIC_BASE, reg));
}
static void
write_coher_fabric(uint32_t reg, uint32_t val)
{
bus_space_write_4(fdtbus_bs_tag, MV_COHERENCY_FABRIC_BASE, reg, val);
}
/* Coherency Fabric registers */ /* Coherency Fabric registers */
static uint32_t static uint32_t
@ -111,56 +99,58 @@ platform_mp_init_secondary(void)
void mpentry(void); void mpentry(void);
void mptramp(void); void mptramp(void);
static void
initialize_coherency_fabric(void)
{
uint32_t val, cpus, mask;
cpus = platform_get_ncpus();
mask = (1 << cpus) - 1;
val = read_coher_fabric(COHER_FABRIC_CTRL);
val |= (mask << 24);
write_coher_fabric(COHER_FABRIC_CTRL, val);
val = read_coher_fabric(COHER_FABRIC_CONF);
val |= (mask << 24);
write_coher_fabric(COHER_FABRIC_CONF, val);
}
void void
platform_mp_start_ap(void) platform_mp_start_ap(void)
{ {
uint32_t reg, *ptr, cpu_num; uint32_t reg, *src, *dst, cpu_num, div_val, cputype;
vm_offset_t smp_boot;
/*
* Initialization procedure depends on core revision,
* in this step CHIP ID is checked to choose proper procedure
*/
cputype = cpufunc_id();
cputype &= CPU_ID_CPU_MASK;
/* Copy boot code to SRAM */ smp_boot = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
*((unsigned int*)(0xf1020240)) = 0xffff0101; pmap_kenter_nocache(smp_boot, 0xffff0000);
*((unsigned int*)(0xf1008500)) = 0xffff0003; dst = (uint32_t *) smp_boot;
pmap_kenter_nocache(0x880f0000, 0xffff0000); for (src = (uint32_t *)mptramp; src < (uint32_t *)mpentry;
reg = 0x880f0000; src++, dst++) {
*dst = *src;
for (ptr = (uint32_t *)mptramp; ptr < (uint32_t *)mpentry;
ptr++, reg += 4)
*((uint32_t *)reg) = *ptr;
if (mp_ncpus > 1) {
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL0);
reg &= 0x00ffffff;
reg |= 0x01000000;
write_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL0, reg);
} }
if (mp_ncpus > 2) { kmem_free(kernel_map, smp_boot, PAGE_SIZE);
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1);
reg &= 0xff00ffff; if (cputype == CPU_ID_MV88SV584X_V7) {
reg |= 0x00010000; /* Core rev A0 */
write_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1, reg); div_val = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1);
} div_val &= 0x3f;
if (mp_ncpus > 3) {
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1); for (cpu_num = 1; cpu_num < mp_ncpus; cpu_num++ ) {
reg &= 0x00ffffff; reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1);
reg |= 0x01000000; reg &= CPU_DIVCLK_MASK(cpu_num);
write_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1, reg); reg |= div_val << (cpu_num * 8);
write_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1, reg);
}
} else {
/* Core rev Z1 */
div_val = 0x01;
if (mp_ncpus > 1) {
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL0);
reg &= CPU_DIVCLK_MASK(3);
reg |= div_val << 24;
write_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL0, reg);
}
for (cpu_num = 2; cpu_num < mp_ncpus; cpu_num++ ) {
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1);
reg &= CPU_DIVCLK_MASK(cpu_num);
reg |= div_val << (cpu_num * 8);
write_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1, reg);
}
} }
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL0); reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL0);
@ -190,14 +180,7 @@ platform_mp_start_ap(void)
wmb(); wmb();
DELAY(10); DELAY(10);
initialize_coherency_fabric(); armadaxp_init_coher_fabric();
}
static int
platform_get_ncpus(void)
{
return ((read_coher_fabric(COHER_FABRIC_CONF) & 0xf) + 1);
} }
void void

View File

@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
static int platform_mpp_init(void); static int platform_mpp_init(void);
#if defined(SOC_MV_ARMADAXP) #if defined(SOC_MV_ARMADAXP)
void armadaxp_init_coher_fabric(void);
void armadaxp_l2_init(void); void armadaxp_l2_init(void);
#endif #endif
@ -237,6 +238,10 @@ initarm_late_init(void)
write_cpu_ctrl(CPU_TIMERS_BASE + CPU_TIMER_CONTROL, 0); write_cpu_ctrl(CPU_TIMERS_BASE + CPU_TIMER_CONTROL, 0);
#endif #endif
#if defined(SOC_MV_ARMADAXP) #if defined(SOC_MV_ARMADAXP)
#if !defined(SMP)
/* For SMP case it should be initialized after APs are booted */
armadaxp_init_coher_fabric();
#endif
armadaxp_l2_init(); armadaxp_l2_init();
#endif #endif
} }