On UltraSPARC-III+ and greater take advantage of ASI_ATOMIC_QUAD_LDD_PHYS,

which takes an physical address instead of an virtual one, for loading TTEs
of the kernel TSB so we no longer need to lock the kernel TSB into the dTLB,
which only has a very limited number of lockable dTLB slots. The net result
is that we now basically can handle a kernel TSB of any size and no longer
need to limit the kernel address space based on the number of dTLB slots
available for locked entries. Consequently, other parts of the trap handlers
now also only access the the kernel TSB via its physical address in order
to avoid nested traps, as does the PMAP bootstrap code as we haven't taken
over the trap table at that point, yet. Apart from that the kernel TSB now
is accessed via a direct mapping when we are otherwise taking advantage of
ASI_ATOMIC_QUAD_LDD_PHYS so no further code changes are needed. Most of this
is implemented by extending the patching of the TSB addresses and mask as
well as the ASIs used to load it into the trap table so the runtime overhead
of this change is rather low. Currently the use of ASI_ATOMIC_QUAD_LDD_PHYS
is not yet enabled on SPARC64 CPUs due to lack of testing and due to the
fact it might require minor adjustments there.
Theoretically it should be possible to use the same approach also for the
user TSB, which already is not locked into the dTLB, avoiding nested traps.
However, for reasons I don't understand yet OpenSolaris only does that with
SPARC64 CPUs. On the other hand I think that also addressing the user TSB
physically and thus avoiding nested traps would get us closer to sharing
this code with sun4v, which only supports trap level 0 and 1, so eventually
we could have a single kernel which runs on both sun4u and sun4v (as does
Linux and OpenBSD).

Developed at and committed from:	27C3
This commit is contained in:
marius 2010-12-29 16:59:33 +00:00
parent 074b42904f
commit 10c0dabcb4
7 changed files with 295 additions and 113 deletions

View File

@ -61,18 +61,18 @@ struct pmap {
struct mtx pm_mtx;
struct tte *pm_tsb;
vm_object_t pm_tsb_obj;
cpumask_t pm_active;
uint32_t pm_gen_count; /* generation count (pmap lock dropped) */
u_int pm_retries;
cpumask_t pm_active;
uint32_t pm_gen_count; /* generation count (pmap lock dropped) */
u_int pm_retries;
u_int pm_context[MAXCPU];
struct pmap_statistics pm_stats;
};
#define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx)
#define PMAP_LOCK_ASSERT(pmap, type) \
#define PMAP_LOCK_ASSERT(pmap, type) \
mtx_assert(&(pmap)->pm_mtx, (type))
#define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx)
#define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \
#define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \
NULL, MTX_DEF | MTX_DUPOK)
#define PMAP_LOCKED(pmap) mtx_owned(&(pmap)->pm_mtx)
#define PMAP_MTX(pmap) (&(pmap)->pm_mtx)
@ -99,6 +99,7 @@ int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2, struct tte *tp,
vm_offset_t va);
void pmap_map_tsb(void);
void pmap_set_kctx(void);
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
@ -114,7 +115,7 @@ SYSCTL_DECL(_debug_pmap_stats);
#define PMAP_STATS_VAR(name) \
static long name; \
SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, name, CTLFLAG_RW, \
SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, name, CTLFLAG_RW, \
&name, 0, "")
#define PMAP_STATS_INC(var) \

View File

@ -50,6 +50,7 @@ extern struct tte *tsb_kernel;
extern vm_size_t tsb_kernel_mask;
extern vm_size_t tsb_kernel_size;
extern vm_paddr_t tsb_kernel_phys;
extern u_int tsb_kernel_ldd_phys;
static __inline struct tte *
tsb_vpntobucket(pmap_t pm, vm_offset_t vpn)

View File

@ -75,8 +75,12 @@ __FBSDID("$FreeBSD$");
#include "assym.s"
#define TSB_KERNEL_MASK 0x0
#define TSB_KERNEL 0x0
#define TSB_ASI 0x0
#define TSB_KERNEL 0x0
#define TSB_KERNEL_MASK 0x0
#define TSB_KERNEL_PHYS 0x0
#define TSB_KERNEL_PHYS_END 0x0
#define TSB_QUAD_LDD 0x0
.register %g2,#ignore
.register %g3,#ignore
@ -84,19 +88,19 @@ __FBSDID("$FreeBSD$");
.register %g7,#ignore
/*
* Atomically set the reference bit in a TTE.
* Atomically set a bit in a TTE.
*/
#define TTE_SET_BIT(r1, r2, r3, bit) \
#define TTE_SET_BIT(r1, r2, r3, bit, a, asi) \
add r1, TTE_DATA, r1 ; \
ldx [r1], r2 ; \
LD(x, a) [r1] asi, r2 ; \
9: or r2, bit, r3 ; \
casxa [r1] ASI_N, r2, r3 ; \
CAS(x, a) [r1] asi, r2, r3 ; \
cmp r2, r3 ; \
bne,pn %xcc, 9b ; \
mov r3, r2
#define TTE_SET_REF(r1, r2, r3) TTE_SET_BIT(r1, r2, r3, TD_REF)
#define TTE_SET_W(r1, r2, r3) TTE_SET_BIT(r1, r2, r3, TD_W)
#define TTE_SET_REF(r1, r2, r3, a, asi) TTE_SET_BIT(r1, r2, r3, TD_REF, a, asi)
#define TTE_SET_W(r1, r2, r3, a, asi) TTE_SET_BIT(r1, r2, r3, TD_W, a, asi)
/*
* Macros for spilling and filling live windows.
@ -691,7 +695,7 @@ ENTRY(tl0_immu_miss_set_ref)
/*
* Set the reference bit.
*/
TTE_SET_REF(%g4, %g2, %g3)
TTE_SET_REF(%g4, %g2, %g3, a, ASI_N)
/*
* May have become invalid during casxa, in which case start over.
@ -849,7 +853,7 @@ ENTRY(tl0_dmmu_miss_set_ref)
/*
* Set the reference bit.
*/
TTE_SET_REF(%g4, %g2, %g3)
TTE_SET_REF(%g4, %g2, %g3, a, ASI_N)
/*
* May have become invalid during casxa, in which case start over.
@ -997,7 +1001,7 @@ tl1_dmmu_prot_user:
/*
* Set the hardware write bit.
*/
TTE_SET_W(%g4, %g2, %g3)
TTE_SET_W(%g4, %g2, %g3, a, ASI_N)
/*
* Delete the old TLB entry and clear the SFSR.
@ -1327,11 +1331,14 @@ END(tl1_sfsr_trap)
* Compute the address of the TTE. The TSB mask and address of the
* TSB are patched at startup.
*/
.globl tl1_immu_miss_patch_1
tl1_immu_miss_patch_1:
.globl tl1_immu_miss_patch_tsb_mask_1
tl1_immu_miss_patch_tsb_mask_1:
sethi %hi(TSB_KERNEL_MASK), %g6
or %g6, %lo(TSB_KERNEL_MASK), %g6
.globl tl1_immu_miss_patch_tsb_1
tl1_immu_miss_patch_tsb_1:
sethi %hi(TSB_KERNEL), %g7
or %g7, %lo(TSB_KERNEL), %g7
srlx %g5, TAR_VPN_SHIFT, %g5
and %g5, %g6, %g6
@ -1341,7 +1348,9 @@ tl1_immu_miss_patch_1:
/*
* Load the TTE.
*/
ldda [%g6] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */
.globl tl1_immu_miss_patch_quad_ldd_1
tl1_immu_miss_patch_quad_ldd_1:
ldda [%g6] TSB_QUAD_LDD, %g6 /*, %g7 */
/*
* Check that it's valid and executable and that the virtual page
@ -1375,11 +1384,14 @@ ENTRY(tl1_immu_miss_set_ref)
* Recompute the TTE address, which we clobbered loading the TTE.
* The TSB mask and address of the TSB are patched at startup.
*/
.globl tl1_immu_miss_patch_2
tl1_immu_miss_patch_2:
.globl tl1_immu_miss_patch_tsb_mask_2
tl1_immu_miss_patch_tsb_mask_2:
sethi %hi(TSB_KERNEL_MASK), %g6
or %g6, %lo(TSB_KERNEL_MASK), %g6
.globl tl1_immu_miss_patch_tsb_2
tl1_immu_miss_patch_tsb_2:
sethi %hi(TSB_KERNEL), %g7
or %g7, %lo(TSB_KERNEL), %g7
and %g5, %g6, %g5
sllx %g5, TTE_SHIFT, %g5
@ -1388,7 +1400,10 @@ tl1_immu_miss_patch_2:
/*
* Set the reference bit.
*/
TTE_SET_REF(%g5, %g6, %g7)
.globl tl1_immu_miss_patch_asi_1
tl1_immu_miss_patch_asi_1:
wr %g0, TSB_ASI, %asi
TTE_SET_REF(%g5, %g6, %g7, a, %asi)
/*
* May have become invalid during casxa, in which case start over.
@ -1447,11 +1462,14 @@ END(tl1_immu_miss_trap)
* Compute the address of the TTE. The TSB mask and address of the
* TSB are patched at startup.
*/
.globl tl1_dmmu_miss_patch_1
tl1_dmmu_miss_patch_1:
.globl tl1_dmmu_miss_patch_tsb_mask_1
tl1_dmmu_miss_patch_tsb_mask_1:
sethi %hi(TSB_KERNEL_MASK), %g6
or %g6, %lo(TSB_KERNEL_MASK), %g6
.globl tl1_dmmu_miss_patch_tsb_1
tl1_dmmu_miss_patch_tsb_1:
sethi %hi(TSB_KERNEL), %g7
or %g7, %lo(TSB_KERNEL), %g7
srlx %g5, TAR_VPN_SHIFT, %g5
and %g5, %g6, %g6
@ -1461,7 +1479,9 @@ tl1_dmmu_miss_patch_1:
/*
* Load the TTE.
*/
ldda [%g6] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */
.globl tl1_dmmu_miss_patch_quad_ldd_1
tl1_dmmu_miss_patch_quad_ldd_1:
ldda [%g6] TSB_QUAD_LDD, %g6 /*, %g7 */
/*
* Check that it's valid and that the virtual page numbers match.
@ -1492,11 +1512,14 @@ ENTRY(tl1_dmmu_miss_set_ref)
* Recompute the TTE address, which we clobbered loading the TTE.
* The TSB mask and address of the TSB are patched at startup.
*/
.globl tl1_dmmu_miss_patch_2
tl1_dmmu_miss_patch_2:
.globl tl1_dmmu_miss_patch_tsb_mask_2
tl1_dmmu_miss_patch_tsb_mask_2:
sethi %hi(TSB_KERNEL_MASK), %g6
or %g6, %lo(TSB_KERNEL_MASK), %g6
.globl tl1_dmmu_miss_patch_tsb_2
tl1_dmmu_miss_patch_tsb_2:
sethi %hi(TSB_KERNEL), %g7
or %g7, %lo(TSB_KERNEL), %g7
and %g5, %g6, %g5
sllx %g5, TTE_SHIFT, %g5
@ -1505,7 +1528,10 @@ tl1_dmmu_miss_patch_2:
/*
* Set the reference bit.
*/
TTE_SET_REF(%g5, %g6, %g7)
.globl tl1_dmmu_miss_patch_asi_1
tl1_dmmu_miss_patch_asi_1:
wr %g0, TSB_ASI, %asi
TTE_SET_REF(%g5, %g6, %g7, a, %asi)
/*
* May have become invalid during casxa, in which case start over.
@ -1545,15 +1571,36 @@ ENTRY(tl1_dmmu_miss_direct)
* correspond to the TTE valid and page size bits are left set, so
* they don't have to be included in the TTE bits below. We know they
* are set because the virtual address is in the upper va hole.
* NB: if we are taking advantage of the ASI_ATOMIC_QUAD_LDD_PHYS
* and we get a miss on the directly accessed kernel TSB we must not
* set TD_CV in order to access it uniformly bypassing the D$.
*/
setx TLB_DIRECT_ADDRESS_MASK, %g7, %g4
and %g5, %g4, %g4
setx TLB_DIRECT_TO_TTE_MASK, %g7, %g6
and %g5, %g6, %g5
or %g5, TD_CP | TD_CV | TD_W, %g5
.globl tl1_dmmu_miss_direct_patch_tsb_phys_1
tl1_dmmu_miss_direct_patch_tsb_phys_1:
sethi %hi(TSB_KERNEL_PHYS), %g7
or %g7, %lo(TSB_KERNEL_PHYS), %g7
cmp %g4, %g7
bl,pt %xcc, 1f
or %g5, TD_CP | TD_W, %g5
.globl tl1_dmmu_miss_direct_patch_tsb_phys_end_1
tl1_dmmu_miss_direct_patch_tsb_phys_end_1:
sethi %hi(TSB_KERNEL_PHYS_END), %g7
or %g7, %lo(TSB_KERNEL_PHYS_END), %g7
cmp %g4, %g7
bg,a,pt %xcc, 1f
nop
ba,pt %xcc, 2f
nop
1: or %g5, TD_CV, %g5
/*
* Load the TTE data into the TLB and retry the instruction.
*/
stxa %g5, [%g0] ASI_DTLB_DATA_IN_REG
2: stxa %g5, [%g0] ASI_DTLB_DATA_IN_REG
retry
END(tl1_dmmu_miss_direct)
@ -1584,11 +1631,14 @@ ENTRY(tl1_dmmu_prot_1)
* Compute the address of the TTE. The TSB mask and address of the
* TSB are patched at startup.
*/
.globl tl1_dmmu_prot_patch_1
tl1_dmmu_prot_patch_1:
.globl tl1_dmmu_prot_patch_tsb_mask_1
tl1_dmmu_prot_patch_tsb_mask_1:
sethi %hi(TSB_KERNEL_MASK), %g6
or %g6, %lo(TSB_KERNEL_MASK), %g6
.globl tl1_dmmu_prot_patch_tsb_1
tl1_dmmu_prot_patch_tsb_1:
sethi %hi(TSB_KERNEL), %g7
or %g7, %lo(TSB_KERNEL), %g7
srlx %g5, TAR_VPN_SHIFT, %g5
and %g5, %g6, %g6
@ -1598,7 +1648,9 @@ tl1_dmmu_prot_patch_1:
/*
* Load the TTE.
*/
ldda [%g6] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */
.globl tl1_dmmu_prot_patch_quad_ldd_1
tl1_dmmu_prot_patch_quad_ldd_1:
ldda [%g6] TSB_QUAD_LDD, %g6 /*, %g7 */
/*
* Check that it's valid and writeable and that the virtual page
@ -1625,11 +1677,14 @@ tl1_dmmu_prot_patch_1:
* Recompute the TTE address, which we clobbered loading the TTE.
* The TSB mask and address of the TSB are patched at startup.
*/
.globl tl1_dmmu_prot_patch_2
tl1_dmmu_prot_patch_2:
.globl tl1_dmmu_prot_patch_tsb_mask_2
tl1_dmmu_prot_patch_tsb_mask_2:
sethi %hi(TSB_KERNEL_MASK), %g6
or %g6, %lo(TSB_KERNEL_MASK), %g6
.globl tl1_dmmu_prot_patch_tsb_2
tl1_dmmu_prot_patch_tsb_2:
sethi %hi(TSB_KERNEL), %g7
or %g7, %lo(TSB_KERNEL), %g7
and %g5, %g6, %g5
sllx %g5, TTE_SHIFT, %g5
@ -1638,7 +1693,10 @@ tl1_dmmu_prot_patch_2:
/*
* Set the hardware write bit.
*/
TTE_SET_W(%g5, %g6, %g7)
.globl tl1_dmmu_prot_patch_asi_1
tl1_dmmu_prot_patch_asi_1:
wr %g0, TSB_ASI, %asi
TTE_SET_W(%g5, %g6, %g7, a, %asi)
/*
* May have become invalid during casxa, in which case start over.

View File

@ -136,6 +136,7 @@ ASSYM(TS_MIN, TS_MIN);
ASSYM(TS_MAX, TS_MAX);
ASSYM(TLB_DAR_SLOT_SHIFT, TLB_DAR_SLOT_SHIFT);
ASSYM(TLB_CXR_PGSZ_MASK, TLB_CXR_PGSZ_MASK);
ASSYM(TLB_DIRECT_ADDRESS_MASK, TLB_DIRECT_ADDRESS_MASK);
ASSYM(TLB_DIRECT_TO_TTE_MASK, TLB_DIRECT_TO_TTE_MASK);
ASSYM(TV_SIZE_BITS, TV_SIZE_BITS);
#endif

View File

@ -89,6 +89,7 @@ __FBSDID("$FreeBSD$");
#include <machine/smp.h>
#include <machine/tick.h>
#include <machine/tlb.h>
#include <machine/tsb.h>
#include <machine/tte.h>
#include <machine/ver.h>
@ -439,8 +440,12 @@ cpu_mp_bootstrap(struct pcpu *pc)
tick_clear(pc->pc_impl);
tick_stop(pc->pc_impl);
/* Lock the kernel TSB in the TLB. */
pmap_map_tsb();
/* Set the kernel context. */
pmap_set_kctx();
/* Lock the kernel TSB in the TLB if necessary. */
if (tsb_kernel_ldd_phys == 0)
pmap_map_tsb();
/*
* Flush all non-locked TLB entries possibly left over by the

View File

@ -156,6 +156,8 @@ struct pmap kernel_pmap_store;
*/
static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
/*
* Map the given physical page at the specified virtual address in the
* target pmap with the protection requested. If specified the page
@ -166,12 +168,26 @@ static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
static void pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
vm_prot_t prot, boolean_t wired);
extern int tl1_immu_miss_patch_1[];
extern int tl1_immu_miss_patch_2[];
extern int tl1_dmmu_miss_patch_1[];
extern int tl1_dmmu_miss_patch_2[];
extern int tl1_dmmu_prot_patch_1[];
extern int tl1_dmmu_prot_patch_2[];
extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
extern int tl1_dmmu_miss_patch_asi_1[];
extern int tl1_dmmu_miss_patch_quad_ldd_1[];
extern int tl1_dmmu_miss_patch_tsb_1[];
extern int tl1_dmmu_miss_patch_tsb_2[];
extern int tl1_dmmu_miss_patch_tsb_mask_1[];
extern int tl1_dmmu_miss_patch_tsb_mask_2[];
extern int tl1_dmmu_prot_patch_asi_1[];
extern int tl1_dmmu_prot_patch_quad_ldd_1[];
extern int tl1_dmmu_prot_patch_tsb_1[];
extern int tl1_dmmu_prot_patch_tsb_2[];
extern int tl1_dmmu_prot_patch_tsb_mask_1[];
extern int tl1_dmmu_prot_patch_tsb_mask_2[];
extern int tl1_immu_miss_patch_asi_1[];
extern int tl1_immu_miss_patch_quad_ldd_1[];
extern int tl1_immu_miss_patch_tsb_1[];
extern int tl1_immu_miss_patch_tsb_2[];
extern int tl1_immu_miss_patch_tsb_mask_1[];
extern int tl1_immu_miss_patch_tsb_mask_2[];
/*
* If user pmap is processed with pmap_remove and with pmap_remove and the
@ -302,13 +318,21 @@ pmap_bootstrap(u_int cpu_impl)
vm_size_t physsz;
vm_size_t virtsz;
u_long data;
u_long vpn;
phandle_t pmem;
phandle_t vmem;
u_int dtlb_slots_avail;
int i;
int j;
int sz;
uint32_t asi;
uint32_t colors;
uint32_t ldd;
/*
* Set the kernel context.
*/
pmap_set_kctx();
colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
@ -355,40 +379,56 @@ pmap_bootstrap(u_int cpu_impl)
/*
* Calculate the size of kernel virtual memory, and the size and mask
* for the kernel TSB based on the phsyical memory size but limited
* by the amount of dTLB slots available for locked entries (given
* that for spitfire-class CPUs all of the dt64 slots can hold locked
* entries but there is no large dTLB for unlocked ones, we don't use
* more than half of it for locked entries).
* by the amount of dTLB slots available for locked entries if we have
* to lock the TSB in the TLB (given that for spitfire-class CPUs all
* of the dt64 slots can hold locked entries but there is no large
* dTLB for unlocked ones, we don't use more than half of it for the
* TSB).
* Note that for reasons unknown OpenSolaris doesn't take advantage of
* ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III. However, given that no
* public documentation is available for these, the latter just might
* not support it, yet.
*/
dtlb_slots_avail = 0;
for (i = 0; i < dtlb_slots; i++) {
data = dtlb_get_data(i);
if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
dtlb_slots_avail++;
}
#ifdef SMP
dtlb_slots_avail -= PCPU_PAGES;
#endif
if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
cpu_impl < CPU_IMPL_ULTRASPARCIII)
dtlb_slots_avail /= 2;
virtsz = roundup(physsz, PAGE_SIZE_4M << (PAGE_SHIFT - TTE_SHIFT));
virtsz = MIN(virtsz,
(dtlb_slots_avail * PAGE_SIZE_4M) << (PAGE_SHIFT - TTE_SHIFT));
if (cpu_impl >= CPU_IMPL_ULTRASPARCIIIp)
tsb_kernel_ldd_phys = 1;
else {
dtlb_slots_avail = 0;
for (i = 0; i < dtlb_slots; i++) {
data = dtlb_get_data(i);
if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
dtlb_slots_avail++;
}
#ifdef SMP
dtlb_slots_avail -= PCPU_PAGES;
#endif
if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
cpu_impl < CPU_IMPL_ULTRASPARCIII)
dtlb_slots_avail /= 2;
virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
(PAGE_SHIFT - TTE_SHIFT));
}
vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
/*
* Allocate the kernel TSB and lock it in the TLB.
* Allocate the kernel TSB and lock it in the TLB if necessary.
*/
pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
if (pa & PAGE_MASK_4M)
panic("pmap_bootstrap: tsb unaligned\n");
panic("pmap_bootstrap: TSB unaligned\n");
tsb_kernel_phys = pa;
tsb_kernel = (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
pmap_map_tsb();
bzero(tsb_kernel, tsb_kernel_size);
if (tsb_kernel_ldd_phys == 0) {
tsb_kernel =
(struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
pmap_map_tsb();
bzero(tsb_kernel, tsb_kernel_size);
} else {
tsb_kernel =
(struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
}
/*
* Allocate and map the dynamic per-CPU area for the BSP.
@ -403,35 +443,84 @@ pmap_bootstrap(u_int cpu_impl)
msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
/*
* Patch the virtual address and the tsb mask into the trap table.
* Patch the TSB addresses and mask as well as the ASIs used to load
* it into the trap table.
*/
#define SETHI(rd, imm22) \
(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) | \
#define LDDA_R_I_R(rd, imm_asi, rs1, rs2) \
(EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) | \
EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) | \
EIF_F3_RS2(rs2))
#define OR_R_I_R(rd, imm13, rs1) \
(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) | \
EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
#define SETHI(rd, imm22) \
(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) | \
EIF_IMM((imm22) >> 10, 22))
#define OR_R_I_R(rd, imm13, rs1) \
(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) | \
#define WR_R_I(rd, imm13, rs1) \
(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) | \
EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
#define PATCH(addr) do { \
if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) || \
addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0, IF_F3_RS1(addr[1])) || \
addr[2] != SETHI(IF_F2_RD(addr[2]), 0x0)) \
panic("pmap_boostrap: patched instructions have changed"); \
addr[0] |= EIF_IMM((tsb_kernel_mask) >> 10, 22); \
addr[1] |= EIF_IMM(tsb_kernel_mask, 10); \
addr[2] |= EIF_IMM(((vm_offset_t)tsb_kernel) >> 10, 22); \
flush(addr); \
flush(addr + 1); \
flush(addr + 2); \
#define PATCH_ASI(addr, asi) do { \
if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0, \
IF_F3_RS1(addr[0]))) \
panic("%s: patched instructions have changed", \
__func__); \
addr[0] |= EIF_IMM((asi), 13); \
flush(addr); \
} while (0)
PATCH(tl1_immu_miss_patch_1);
PATCH(tl1_immu_miss_patch_2);
PATCH(tl1_dmmu_miss_patch_1);
PATCH(tl1_dmmu_miss_patch_2);
PATCH(tl1_dmmu_prot_patch_1);
PATCH(tl1_dmmu_prot_patch_2);
#define PATCH_LDD(addr, asi) do { \
if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0, \
IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0]))) \
panic("%s: patched instructions have changed", \
__func__); \
addr[0] |= EIF_F3_IMM_ASI(asi); \
flush(addr); \
} while (0)
#define PATCH_TSB(addr, val) do { \
if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) || \
addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0, \
IF_F3_RS1(addr[1]))) \
panic("%s: patched instructions have changed", \
__func__); \
addr[0] |= EIF_IMM((val) >> 10, 22); \
addr[1] |= EIF_IMM((val), 10); \
flush(addr); \
flush(addr + 1); \
} while (0)
if (tsb_kernel_ldd_phys == 0) {
asi = ASI_N;
ldd = ASI_NUCLEUS_QUAD_LDD;
off = (vm_offset_t)tsb_kernel;
} else {
asi = ASI_PHYS_USE_EC;
ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
off = (vm_offset_t)tsb_kernel_phys;
}
PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
tsb_kernel_phys + tsb_kernel_size - 1);
PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
PATCH_TSB(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
PATCH_TSB(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
PATCH_TSB(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
PATCH_TSB(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
PATCH_TSB(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
PATCH_TSB(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
/*
* Enter fake 8k pages for the 4MB kernel pages, so that
@ -442,9 +531,10 @@ pmap_bootstrap(u_int cpu_impl)
va = kernel_tlbs[i].te_va;
for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
tp = tsb_kvtotte(va + off);
tp->tte_vpn = TV_VPN(va + off, TS_8K);
tp->tte_data = TD_V | TD_8K | TD_PA(pa + off) |
TD_REF | TD_SW | TD_CP | TD_CV | TD_P | TD_W;
vpn = TV_VPN(va + off, TS_8K);
data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
TD_SW | TD_CP | TD_CV | TD_P | TD_W;
pmap_bootstrap_set_tte(tp, vpn, data);
}
}
@ -485,9 +575,10 @@ pmap_bootstrap(u_int cpu_impl)
pa = kstack0_phys + i * PAGE_SIZE;
va = kstack0 + i * PAGE_SIZE;
tp = tsb_kvtotte(va);
tp->tte_vpn = TV_VPN(va, TS_8K);
tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW |
TD_CP | TD_CV | TD_P | TD_W;
vpn = TV_VPN(va, TS_8K);
data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
TD_CV | TD_P | TD_W;
pmap_bootstrap_set_tte(tp, vpn, data);
}
/*
@ -527,9 +618,8 @@ pmap_bootstrap(u_int cpu_impl)
off += PAGE_SIZE) {
va = translations[i].om_start + off;
tp = tsb_kvtotte(va);
tp->tte_vpn = TV_VPN(va, TS_8K);
tp->tte_data =
((translations[i].om_tte &
vpn = TV_VPN(va, TS_8K);
data = ((translations[i].om_tte &
~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
(cpu_impl >= CPU_IMPL_ULTRASPARCI &&
cpu_impl < CPU_IMPL_ULTRASPARCIII ?
@ -537,6 +627,7 @@ pmap_bootstrap(u_int cpu_impl)
(TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
(TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
off;
pmap_bootstrap_set_tte(tp, vpn, data);
}
}
@ -571,20 +662,17 @@ pmap_bootstrap(u_int cpu_impl)
tlb_flush_nonlocked();
}
/*
* Map the 4MB kernel TSB pages.
*/
void
pmap_map_tsb(void)
{
vm_offset_t va;
vm_paddr_t pa;
u_long data;
register_t s;
int i;
s = intr_disable();
/*
* Map the 4MB TSB pages.
*/
for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
va = (vm_offset_t)tsb_kernel + i;
pa = tsb_kernel_phys + i;
@ -594,16 +682,19 @@ pmap_map_tsb(void)
TLB_TAR_CTX(TLB_CTX_KERNEL));
stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
}
}
/*
* Set the secondary context to be the kernel context (needed for FP block
* operations in the kernel).
*/
void
pmap_set_kctx(void)
{
/*
* Set the secondary context to be the kernel context (needed for
* FP block operations in the kernel).
*/
stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
flush(KERNBASE);
intr_restore(s);
}
/*
@ -628,6 +719,27 @@ pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
panic("pmap_bootstrap_alloc");
}
/*
* Set a TTE. This function is intended as a helper when tsb_kernel is
* direct-mapped but we haven't taken over the trap table, yet, as it's the
* case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
* the kernel TSB.
*/
void
pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
{
if (tsb_kernel_ldd_phys == 0) {
tp->tte_vpn = vpn;
tp->tte_data = data;
} else {
stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
ASI_PHYS_USE_EC, vpn);
stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
ASI_PHYS_USE_EC, data);
}
}
/*
* Initialize a vm_page's machine-dependent fields.
*/

View File

@ -26,9 +26,11 @@
* SUCH DAMAGE.
*
* from BSDI: pmap.c,v 1.28.2.15 2000/04/27 03:10:31 cp Exp
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_ddb.h"
#include "opt_pmap.h"
@ -43,7 +45,7 @@
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
@ -78,6 +80,8 @@ struct tte *tsb_kernel;
vm_size_t tsb_kernel_mask;
vm_size_t tsb_kernel_size;
vm_paddr_t tsb_kernel_phys;
vm_paddr_t tsb_kernel_phys_end;
u_int tsb_kernel_ldd_phys;
struct tte *
tsb_tte_lookup(pmap_t pm, vm_offset_t va)