Implement a direct mapped address region, like alpha and ia64. This
basically maps all of physical memory 1:1 to a range of virtual addresses outside of normal kva. The advantage of doing this instead of accessing phsyical addresses directly is that memory accesses will go through the data cache, and will participate in the normal cache coherency algorithm for invalidating lines in our own and in other cpus' data caches. So we don't have to flush the cache manually or send IPIs to do so on other cpus. Also, since the mappings never change, we don't have to flush them from the tlb manually. This makes pmap_copy_page and pmap_zero_page MP safe, allowing the idle zero proc to run outside of giant. Inspired by: ia64
This commit is contained in:
parent
11ae409d07
commit
30bbe52432
@ -61,6 +61,7 @@ typedef struct pmap *pmap_t;
|
||||
struct md_page {
|
||||
STAILQ_HEAD(, tte) tte_list;
|
||||
int colors[DCACHE_COLORS];
|
||||
int color;
|
||||
int flags;
|
||||
};
|
||||
|
||||
|
@ -29,6 +29,12 @@
|
||||
#ifndef _MACHINE_TLB_H_
|
||||
#define _MACHINE_TLB_H_
|
||||
|
||||
#define TLB_DIRECT_MASK (((1UL << (64 - 38)) - 1) << 38)
|
||||
#define TLB_DIRECT_SHIFT (3)
|
||||
#define TLB_DIRECT_UNCACHEABLE_SHIFT (11)
|
||||
#define TLB_DIRECT_COLOR_SHIFT (10)
|
||||
#define TLB_DIRECT_UNCACHEABLE (1 << TLB_DIRECT_UNCACHEABLE_SHIFT)
|
||||
|
||||
#define TLB_DAR_SLOT_SHIFT (3)
|
||||
#define TLB_DAR_SLOT(slot) ((slot) << TLB_DAR_SLOT_SHIFT)
|
||||
|
||||
|
@ -1434,17 +1434,24 @@ END(tl1_immu_miss_trap)
|
||||
|
||||
/*
|
||||
* Extract the context from the contents of the tag access register.
|
||||
* If its non-zero this is a fault on a user address, otherwise get
|
||||
* the virtual page number.
|
||||
* If its non-zero this is a fault on a user address. Note that the
|
||||
* faulting address is passed in %g2.
|
||||
*/
|
||||
sllx %g6, 64 - TAR_VPN_SHIFT, %g5
|
||||
brnz,a,pn %g5, tl1_dmmu_miss_user
|
||||
mov %g6, %g2
|
||||
|
||||
/*
|
||||
* Check for the direct mapped physical region. These addresses have
|
||||
* the high bit set so they are negative.
|
||||
*/
|
||||
brlz,pn %g6, tl1_dmmu_miss_direct
|
||||
EMPTY
|
||||
|
||||
/*
|
||||
* Find the index into the kernel tsb.
|
||||
*/
|
||||
set TSB_KERNEL_MASK, %g4
|
||||
set TSB_KERNEL_MASK, %g4
|
||||
srlx %g6, TAR_VPN_SHIFT, %g6
|
||||
and %g6, %g4, %g3
|
||||
|
||||
@ -1506,6 +1513,47 @@ ENTRY(tl1_dmmu_miss_trap)
|
||||
mov T_DATA_MISS | T_KERNEL, %o0
|
||||
END(tl1_dmmu_miss_trap)
|
||||
|
||||
ENTRY(tl1_dmmu_miss_direct)
|
||||
#if KTR_COMPILE & KTR_TRAP
|
||||
CATR(KTR_TRAP, "tl1_dmmu_miss_direct: pc=%#lx sp=%#lx tar=%#lx"
|
||||
, %g1, %g2, %g3, 7, 8, 9)
|
||||
rdpr %tpc, %g2
|
||||
stx %g2, [%g1 + KTR_PARM1]
|
||||
add %sp, SPOFF, %g2
|
||||
stx %g2, [%g1 + KTR_PARM2]
|
||||
stx %g6, [%g1 + KTR_PARM3]
|
||||
9:
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check the cache bits in the virtual address to see if this mapping
|
||||
* is virtually cacheable. We set this up so that the masks fit in
|
||||
* immediates... Note that the arithmetic shift sign extends, keeping
|
||||
* all the top bits set.
|
||||
*/
|
||||
srax %g6, TLB_DIRECT_SHIFT, %g6
|
||||
andcc %g6, TLB_DIRECT_UNCACHEABLE, %g0
|
||||
mov TD_CP | TD_CV | TD_W, %g1
|
||||
movnz %xcc, TD_CP | TD_W, %g1
|
||||
|
||||
/*
|
||||
* Mask off the high bits of the virtual address to get the physical
|
||||
* address, and or in the tte bits. The high bit is left set in the
|
||||
* physical address, which corresponds to the tte valid bit, so that
|
||||
* we don't have to include it in the tte bits. We ignore the cache
|
||||
* bits, since they get shifted into the soft tte bits anyway.
|
||||
*/
|
||||
setx TLB_DIRECT_MASK & ~TD_V, %g3, %g2
|
||||
andn %g6, %g2, %g3
|
||||
or %g3, %g1, %g3
|
||||
|
||||
/*
|
||||
* Load the tte data into the TLB and retry the instruction.
|
||||
*/
|
||||
stxa %g3, [%g0] ASI_DTLB_DATA_IN_REG
|
||||
retry
|
||||
END(tl1_dmmu_miss_direct)
|
||||
|
||||
ENTRY(tl1_dmmu_miss_user)
|
||||
/*
|
||||
* Try a fast inline lookup of the user tsb.
|
||||
|
@ -1434,17 +1434,24 @@ END(tl1_immu_miss_trap)
|
||||
|
||||
/*
|
||||
* Extract the context from the contents of the tag access register.
|
||||
* If its non-zero this is a fault on a user address, otherwise get
|
||||
* the virtual page number.
|
||||
* If its non-zero this is a fault on a user address. Note that the
|
||||
* faulting address is passed in %g2.
|
||||
*/
|
||||
sllx %g6, 64 - TAR_VPN_SHIFT, %g5
|
||||
brnz,a,pn %g5, tl1_dmmu_miss_user
|
||||
mov %g6, %g2
|
||||
|
||||
/*
|
||||
* Check for the direct mapped physical region. These addresses have
|
||||
* the high bit set so they are negative.
|
||||
*/
|
||||
brlz,pn %g6, tl1_dmmu_miss_direct
|
||||
EMPTY
|
||||
|
||||
/*
|
||||
* Find the index into the kernel tsb.
|
||||
*/
|
||||
set TSB_KERNEL_MASK, %g4
|
||||
set TSB_KERNEL_MASK, %g4
|
||||
srlx %g6, TAR_VPN_SHIFT, %g6
|
||||
and %g6, %g4, %g3
|
||||
|
||||
@ -1506,6 +1513,47 @@ ENTRY(tl1_dmmu_miss_trap)
|
||||
mov T_DATA_MISS | T_KERNEL, %o0
|
||||
END(tl1_dmmu_miss_trap)
|
||||
|
||||
ENTRY(tl1_dmmu_miss_direct)
|
||||
#if KTR_COMPILE & KTR_TRAP
|
||||
CATR(KTR_TRAP, "tl1_dmmu_miss_direct: pc=%#lx sp=%#lx tar=%#lx"
|
||||
, %g1, %g2, %g3, 7, 8, 9)
|
||||
rdpr %tpc, %g2
|
||||
stx %g2, [%g1 + KTR_PARM1]
|
||||
add %sp, SPOFF, %g2
|
||||
stx %g2, [%g1 + KTR_PARM2]
|
||||
stx %g6, [%g1 + KTR_PARM3]
|
||||
9:
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check the cache bits in the virtual address to see if this mapping
|
||||
* is virtually cacheable. We set this up so that the masks fit in
|
||||
* immediates... Note that the arithmetic shift sign extends, keeping
|
||||
* all the top bits set.
|
||||
*/
|
||||
srax %g6, TLB_DIRECT_SHIFT, %g6
|
||||
andcc %g6, TLB_DIRECT_UNCACHEABLE, %g0
|
||||
mov TD_CP | TD_CV | TD_W, %g1
|
||||
movnz %xcc, TD_CP | TD_W, %g1
|
||||
|
||||
/*
|
||||
* Mask off the high bits of the virtual address to get the physical
|
||||
* address, and or in the tte bits. The high bit is left set in the
|
||||
* physical address, which corresponds to the tte valid bit, so that
|
||||
* we don't have to include it in the tte bits. We ignore the cache
|
||||
* bits, since they get shifted into the soft tte bits anyway.
|
||||
*/
|
||||
setx TLB_DIRECT_MASK & ~TD_V, %g3, %g2
|
||||
andn %g6, %g2, %g3
|
||||
or %g3, %g1, %g3
|
||||
|
||||
/*
|
||||
* Load the tte data into the TLB and retry the instruction.
|
||||
*/
|
||||
stxa %g3, [%g0] ASI_DTLB_DATA_IN_REG
|
||||
retry
|
||||
END(tl1_dmmu_miss_direct)
|
||||
|
||||
ENTRY(tl1_dmmu_miss_user)
|
||||
/*
|
||||
* Try a fast inline lookup of the user tsb.
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <sys/param.h>
|
||||
#include <sys/assym.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/gmon.h>
|
||||
#include <sys/ktr.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/queue.h>
|
||||
@ -89,10 +90,14 @@ ASSYM(LSU_VW, LSU_VW);
|
||||
|
||||
ASSYM(TAR_VPN_SHIFT, TAR_VPN_SHIFT);
|
||||
|
||||
ASSYM(TLB_DAR_SLOT_SHIFT, TLB_DAR_SLOT_SHIFT);
|
||||
ASSYM(TLB_DEMAP_NUCLEUS, TLB_DEMAP_NUCLEUS);
|
||||
ASSYM(TLB_DEMAP_PRIMARY, TLB_DEMAP_PRIMARY);
|
||||
ASSYM(TLB_DEMAP_CONTEXT, TLB_DEMAP_CONTEXT);
|
||||
ASSYM(TLB_DEMAP_PAGE, TLB_DEMAP_PAGE);
|
||||
ASSYM(TLB_DIRECT_MASK, TLB_DIRECT_MASK);
|
||||
ASSYM(TLB_DIRECT_UNCACHEABLE, TLB_DIRECT_UNCACHEABLE);
|
||||
ASSYM(TLB_DIRECT_SHIFT, TLB_DIRECT_SHIFT);
|
||||
|
||||
ASSYM(TSB_BUCKET_ADDRESS_BITS, TSB_BUCKET_ADDRESS_BITS);
|
||||
ASSYM(TSB_BUCKET_SHIFT, TSB_BUCKET_SHIFT);
|
||||
@ -130,6 +135,10 @@ ASSYM(IC_LINESIZE, offsetof(struct cacheinfo, ic_linesize));
|
||||
|
||||
ASSYM(ICA_PA, offsetof(struct ipi_cache_args, ica_pa));
|
||||
|
||||
ASSYM(GMON_PROF_OFF, GMON_PROF_OFF);
|
||||
ASSYM(GMON_PROF_HIRES, GMON_PROF_HIRES);
|
||||
ASSYM(GM_STATE, offsetof(struct gmonparam, state));
|
||||
|
||||
ASSYM(KTR_PROC, KTR_PROC);
|
||||
ASSYM(KTR_TRAP, KTR_TRAP);
|
||||
ASSYM(KTR_SMP, KTR_SMP);
|
||||
@ -164,6 +173,10 @@ ASSYM(TTE_SHIFT, TTE_SHIFT);
|
||||
ASSYM(TD_EXEC, TD_EXEC);
|
||||
ASSYM(TD_REF, TD_REF);
|
||||
ASSYM(TD_SW, TD_SW);
|
||||
ASSYM(TD_V, TD_V);
|
||||
ASSYM(TD_8K, TD_8K);
|
||||
ASSYM(TD_CP, TD_CP);
|
||||
ASSYM(TD_CV, TD_CV);
|
||||
ASSYM(TD_L, TD_L);
|
||||
ASSYM(TD_W, TD_W);
|
||||
|
||||
|
@ -158,6 +158,8 @@ static boolean_t pmap_initialized = FALSE;
|
||||
*/
|
||||
static vm_offset_t pmap_bootstrap_alloc(vm_size_t size);
|
||||
|
||||
static vm_offset_t pmap_map_direct(vm_page_t m);
|
||||
|
||||
/*
|
||||
* If user pmap is processed with pmap_remove and with pmap_remove and the
|
||||
* resident count drops to 0, there are no more pages to remove, so we
|
||||
@ -518,6 +520,7 @@ pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
|
||||
m = &vm_page_array[i];
|
||||
STAILQ_INIT(&m->md.tte_list);
|
||||
m->md.flags = 0;
|
||||
m->md.color = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < translations_size; i++) {
|
||||
@ -581,21 +584,29 @@ int
|
||||
pmap_cache_enter(vm_page_t m, vm_offset_t va)
|
||||
{
|
||||
struct tte *tp;
|
||||
int color;
|
||||
int c, i;
|
||||
|
||||
CTR2(KTR_PMAP, "pmap_cache_enter: m=%p va=%#lx", m, va);
|
||||
PMAP_STATS_INC(pmap_ncache_enter);
|
||||
for (i = 0, c = 0; i < DCACHE_COLORS; i++) {
|
||||
if (i != DCACHE_COLOR(va) && m->md.colors[i] != 0)
|
||||
color = DCACHE_COLOR(va);
|
||||
m->md.colors[color]++;
|
||||
if (m->md.color == color) {
|
||||
CTR0(KTR_PMAP, "pmap_cache_enter: cacheable");
|
||||
return (1);
|
||||
}
|
||||
for (c = 0, i = 0; i < DCACHE_COLORS; i++) {
|
||||
if (m->md.colors[i] != 0)
|
||||
c++;
|
||||
}
|
||||
m->md.colors[DCACHE_COLOR(va)]++;
|
||||
if (c == 0) {
|
||||
if (c == 1) {
|
||||
m->md.color = color;
|
||||
dcache_page_inval(VM_PAGE_TO_PHYS(m));
|
||||
CTR0(KTR_PMAP, "pmap_cache_enter: cacheable");
|
||||
return (1);
|
||||
}
|
||||
PMAP_STATS_INC(pmap_ncache_enter_nc);
|
||||
if ((m->md.flags & PG_UNCACHEABLE) != 0) {
|
||||
if (m->md.color == -1) {
|
||||
CTR0(KTR_PMAP, "pmap_cache_enter: already uncacheable");
|
||||
return (0);
|
||||
}
|
||||
@ -605,7 +616,7 @@ pmap_cache_enter(vm_page_t m, vm_offset_t va)
|
||||
tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
|
||||
}
|
||||
dcache_page_inval(VM_PAGE_TO_PHYS(m));
|
||||
m->md.flags |= PG_UNCACHEABLE;
|
||||
m->md.color = -1;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -613,6 +624,7 @@ void
|
||||
pmap_cache_remove(vm_page_t m, vm_offset_t va)
|
||||
{
|
||||
struct tte *tp;
|
||||
int color;
|
||||
int c, i;
|
||||
|
||||
CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
|
||||
@ -620,18 +632,21 @@ pmap_cache_remove(vm_page_t m, vm_offset_t va)
|
||||
KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
|
||||
("pmap_cache_remove: no mappings %d <= 0",
|
||||
m->md.colors[DCACHE_COLOR(va)]));
|
||||
m->md.colors[DCACHE_COLOR(va)]--;
|
||||
for (i = 0, c = 0; i < DCACHE_COLORS; i++) {
|
||||
color = DCACHE_COLOR(va);
|
||||
m->md.colors[color]--;
|
||||
if (m->md.color != -1 || m->md.colors[color] != 0)
|
||||
return;
|
||||
for (c = 0, i = 0; i < DCACHE_COLORS; i++) {
|
||||
if (m->md.colors[i] != 0)
|
||||
c++;
|
||||
}
|
||||
if (c > 1 || (m->md.flags & PG_UNCACHEABLE) == 0)
|
||||
if (c > 1)
|
||||
return;
|
||||
STAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
|
||||
tp->tte_data |= TD_CV;
|
||||
tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
|
||||
}
|
||||
m->md.flags &= ~PG_UNCACHEABLE;
|
||||
m->md.color = color;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -760,6 +775,21 @@ pmap_map(vm_offset_t *virt, vm_offset_t pa_start, vm_offset_t pa_end, int prot)
|
||||
return (sva);
|
||||
}
|
||||
|
||||
static vm_offset_t
|
||||
pmap_map_direct(vm_page_t m)
|
||||
{
|
||||
vm_offset_t pa;
|
||||
vm_offset_t va;
|
||||
|
||||
pa = VM_PAGE_TO_PHYS(m);
|
||||
if (m->md.color == -1)
|
||||
va = TLB_DIRECT_MASK | pa | TLB_DIRECT_UNCACHEABLE;
|
||||
else
|
||||
va = TLB_DIRECT_MASK | pa |
|
||||
(m->md.color << TLB_DIRECT_COLOR_SHIFT);
|
||||
return (va << TLB_DIRECT_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Map a list of wired pages into kernel virtual address space. This is
|
||||
* intended for temporary mappings which do not need page modification or
|
||||
@ -1412,41 +1442,35 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
|
||||
void
|
||||
pmap_zero_page(vm_page_t m)
|
||||
{
|
||||
vm_offset_t pa;
|
||||
vm_offset_t va;
|
||||
|
||||
pa = VM_PAGE_TO_PHYS(m);
|
||||
CTR1(KTR_PMAP, "pmap_zero_page: pa=%#lx", pa);
|
||||
dcache_page_inval(pa);
|
||||
aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
|
||||
va = pmap_map_direct(m);
|
||||
CTR2(KTR_PMAP, "pmap_zero_page: pa=%#lx va=%#lx",
|
||||
VM_PAGE_TO_PHYS(m), va);
|
||||
bzero((void *)va, PAGE_SIZE);
|
||||
}
|
||||
|
||||
void
|
||||
pmap_zero_page_area(vm_page_t m, int off, int size)
|
||||
{
|
||||
vm_offset_t pa;
|
||||
vm_offset_t va;
|
||||
|
||||
pa = VM_PAGE_TO_PHYS(m);
|
||||
CTR3(KTR_PMAP, "pmap_zero_page_area: pa=%#lx off=%#x size=%#x",
|
||||
pa, off, size);
|
||||
KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
|
||||
dcache_page_inval(pa);
|
||||
aszero(ASI_PHYS_USE_EC, pa + off, size);
|
||||
va = pmap_map_direct(m);
|
||||
CTR4(KTR_PMAP, "pmap_zero_page_area: pa=%#lx va=%#lx off=%#x size=%#x",
|
||||
VM_PAGE_TO_PHYS(m), va, off, size);
|
||||
bzero((void *)(va + off), size);
|
||||
}
|
||||
|
||||
void
|
||||
pmap_zero_page_idle(vm_page_t m)
|
||||
{
|
||||
vm_offset_t pa = VM_PAGE_TO_PHYS(m);
|
||||
vm_offset_t va;
|
||||
|
||||
CTR1(KTR_PMAP, "pmap_zero_page_idle: pa=%#lx", pa);
|
||||
#ifdef SMP
|
||||
mtx_lock(&Giant);
|
||||
#endif
|
||||
dcache_inval_phys(pa, pa + PAGE_SIZE - 1);
|
||||
#ifdef SMP
|
||||
mtx_unlock(&Giant);
|
||||
#endif
|
||||
aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
|
||||
va = pmap_map_direct(m);
|
||||
CTR2(KTR_PMAP, "pmap_zero_page_idle: pa=%#lx va=%#lx",
|
||||
VM_PAGE_TO_PHYS(m), va);
|
||||
bzero((void *)va, PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1458,11 +1482,11 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
|
||||
vm_offset_t dst;
|
||||
vm_offset_t src;
|
||||
|
||||
dst = VM_PAGE_TO_PHYS(mdst);
|
||||
src = VM_PAGE_TO_PHYS(msrc);
|
||||
CTR2(KTR_PMAP, "pmap_copy_page: src=%#lx dst=%#lx", src, dst);
|
||||
dcache_page_inval(dst);
|
||||
ascopy(ASI_PHYS_USE_EC, src, dst, PAGE_SIZE);
|
||||
src = pmap_map_direct(msrc);
|
||||
dst = pmap_map_direct(mdst);
|
||||
CTR4(KTR_PMAP, "pmap_zero_page: src=%#lx va=%#lx dst=%#lx va=%#lx",
|
||||
VM_PAGE_TO_PHYS(msrc), src, VM_PAGE_TO_PHYS(mdst), dst);
|
||||
bcopy((void *)src, (void *)dst, PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user