From a106c6930a32ce9e34c75ce416f309d222f06a5e Mon Sep 17 00:00:00 2001 From: Jake Burkholder Date: Mon, 23 Dec 2002 23:39:57 +0000 Subject: [PATCH] - Change the way the direct mapped region is implemented to be generally useful for accessing more than 1 page of contiguous physical memory, and to use 4mb tlb entries instead of 8k. This requires that the system only use the direct mapped addresses when they have the same virtual colour as all other mappings of the same page, instead of being able to choose the colour and cachability of the mapping. - Adapt the physical page copying and zeroing functions to account for not being able to choose the colour or cachability of the direct mapped address. This adds a lot more cases to handle. Basically when a page has a different colour than its direct mapped address we have a choice between bypassing the data cache and using physical addresses directly, which requires a cache flush, or mapping it at the right colour, which requires a tlb flush. For now we choose to map the page and do the tlb flush. This will allows the direct mapped addresses to be used for more things that don't require normal pmap handling, including mapping the vm_page structures, the message buffer, temporary mappings for crash dumps, and will provide greater benefit for implementing uma_small_alloc, due to the much greater tlb coverage. --- sys/sparc64/include/tlb.h | 17 +++- sys/sparc64/include/vmparam.h | 103 +++++++++++++------ sys/sparc64/sparc64/exception.S | 25 ++--- sys/sparc64/sparc64/genassym.c | 6 +- sys/sparc64/sparc64/pmap.c | 175 +++++++++++++++++++++++--------- 5 files changed, 219 insertions(+), 107 deletions(-) diff --git a/sys/sparc64/include/tlb.h b/sys/sparc64/include/tlb.h index 6cffc1cfa169..d68a0ad4c2d8 100644 --- a/sys/sparc64/include/tlb.h +++ b/sys/sparc64/include/tlb.h @@ -29,11 +29,18 @@ #ifndef _MACHINE_TLB_H_ #define _MACHINE_TLB_H_ -#define TLB_DIRECT_MASK (((1UL << (64 - 38)) - 1) << 38) -#define TLB_DIRECT_SHIFT (3) -#define TLB_DIRECT_UNCACHEABLE_SHIFT (11) -#define TLB_DIRECT_COLOR_SHIFT (10) -#define TLB_DIRECT_UNCACHEABLE (1 << TLB_DIRECT_UNCACHEABLE_SHIFT) +#define TLB_DIRECT_ADDRESS_BITS (43) +#define TLB_DIRECT_PAGE_BITS (PAGE_SHIFT_4M) + +#define TLB_DIRECT_ADDRESS_MASK ((1UL << TLB_DIRECT_ADDRESS_BITS) - 1) +#define TLB_DIRECT_PAGE_MASK ((1UL << TLB_DIRECT_PAGE_BITS) - 1) + +#define TLB_PHYS_TO_DIRECT(pa) \ + ((pa) | VM_MIN_DIRECT_ADDRESS) +#define TLB_DIRECT_TO_PHYS(va) \ + ((va) & TLB_DIRECT_ADDRESS_MASK) +#define TLB_DIRECT_TO_TTE_MASK \ + (TD_V | TD_4M | (TLB_DIRECT_ADDRESS_MASK - TLB_DIRECT_PAGE_MASK)) #define TLB_DAR_SLOT_SHIFT (3) #define TLB_DAR_SLOT(slot) ((slot) << TLB_DAR_SLOT_SHIFT) diff --git a/sys/sparc64/include/vmparam.h b/sys/sparc64/include/vmparam.h index 7b916bc952fa..910a8a6d9fa0 100644 --- a/sys/sparc64/include/vmparam.h +++ b/sys/sparc64/include/vmparam.h @@ -78,26 +78,81 @@ #define MAXSLP 20 /* - * Highest user address. Also address of initial user stack. This is - * arbitrary, neither the structure or size of the user page table (tsb) - * nor the location or size of the kernel virtual address space have any - * bearing on what we use for user addresses. We want something relatively - * high to give a large address space, but we also have to take the out of - * range va hole into account. So we pick an address just before the start - * of the hole, which gives a user address space of just under 8TB. Note - * that if this moves above the va hole, we will have to deal with sign - * extension of virtual addresses. + * Address space layout. + * + * UltraSPARC I and II implement a 44 bit virtual address space. The address + * space is split into 2 regions at each end of the 64 bit address space, with + * an out of range "hole" in the middle. UltraSPARC III implements the full + * 64 bit virtual address space, but we don't really have any use for it and + * 43 bits of user address space is considered to be "enough", so we ignore it. + * + * Upper region: 0xffffffffffffffff + * 0xfffff80000000000 + * + * Hole: 0xfffff7ffffffffff + * 0x0000080000000000 + * + * Lower region: 0x000007ffffffffff + * 0x0000000000000000 + * + * In general we ignore the upper region, and use the lower region as mappable + * space. + * + * We define some interesting address constants: + * + * VM_MIN_ADDRESS and VM_MAX_ADDRESS define the start and of the entire 64 bit + * address space, mostly just for convenience. + * + * VM_MIN_DIRECT_ADDRESS and VM_MAX_DIRECT_ADDRESS define the start and end + * of the direct mapped region. This maps virtual addresses to physical + * addresses directly using 4mb tlb entries, with the physical address encoded + * in the lower 43 bits of virtual address. These mappings are convenient + * because they do not require page tables, and because they never change they + * do not require tlb flushes. However, since these mappings are cacheable, + * we must ensure that all pages accessed this way are either not double + * mapped, or that all other mappings have virtual color equal to physical + * color, in order to avoid creating illegal aliases in the data cache. + * + * VM_MIN_KERNEL_ADDRESS and VM_MAX_KERNEL_ADDRESS define the start and end of + * mappable kernel virtual address space. VM_MIN_KERNEL_ADDRESS is basically + * arbitrary, a convenient address is chosen which allows both the kernel text + * and data and the prom's address space to be mapped with 1 4mb tsb page. + * VM_MAX_KERNEL_ADDRESS is variable, computed at startup time based on the + * amount of physical memory available. Each 4mb tsb page provides 1g of + * virtual address space, with the only practical limit being available + * phsyical memory. + * + * VM_MIN_PROM_ADDRESS and VM_MAX_PROM_ADDRESS define the start and end of the + * prom address space. On startup the prom's mappings are duplicated in the + * kernel tsb, to allow prom memory to be accessed normally by the kernel. + * + * VM_MIN_USER_ADDRESS and VM_MAX_USER_ADDRESS define the start and end of the + * user address space. There are some hardware errata about using addresses + * at the boundary of the va hole, so we allow just under 43 bits of user + * address space. Note that the kernel and user address spaces overlap, but + * this doesn't matter because they use different tlb contexts, and because + * the kernel address space is not mapped into each process' address space. */ -#define VM_MAXUSER_ADDRESS (0x7fe00000000UL) +#define VM_MIN_ADDRESS (0x0000000000000000UL) +#define VM_MAX_ADDRESS (0xffffffffffffffffUL) -#define VM_MIN_ADDRESS (0UL) -#define VM_MAX_ADDRESS (VM_MAXUSER_ADDRESS) +#define VM_MIN_DIRECT_ADDRESS (0xfffff80000000000UL) +#define VM_MAX_DIRECT_ADDRESS (VM_MAX_ADDRESS) -/* - * Initial user stack address for 64 bit processes. Should be highest user - * virtual address. - */ -#define USRSTACK VM_MAXUSER_ADDRESS +#define VM_MIN_KERNEL_ADDRESS (0x00000000c0000000UL) +#define VM_MAX_KERNEL_ADDRESS (vm_max_kernel_address) + +#define VM_MIN_PROM_ADDRESS (0x00000000f0000000UL) +#define VM_MAX_PROM_ADDRESS (0x00000000ffffe000UL) + +#define VM_MIN_USER_ADDRESS (0x0000000000000000UL) +#define VM_MAX_USER_ADDRESS (0x000007fe00000000UL) + +#define VM_MINUSER_ADDRESS (VM_MIN_USER_ADDRESS) +#define VM_MAXUSER_ADDRESS (VM_MAX_USER_ADDRESS) + +#define KERNBASE (VM_MIN_KERNEL_ADDRESS) +#define USRSTACK (VM_MAX_USER_ADDRESS) /* * Virtual size (bytes) for various kernel submaps. @@ -115,20 +170,6 @@ #define VM_KMEM_SIZE_SCALE (3) #endif -/* - * Lowest kernel virtual address, where the kernel is loaded. This is also - * arbitrary. We pick a resonably low address, which allows all of kernel - * text, data and bss to be below the 4 gigabyte mark, yet still high enough - * to cover the prom addresses with 1 tsb page. This also happens to be the - * same as for x86 with default KVA_PAGES... - */ -#define VM_MIN_KERNEL_ADDRESS (0xc0000000) -#define VM_MIN_PROM_ADDRESS (0xf0000000) -#define VM_MAX_PROM_ADDRESS (0xffffe000) - -#define KERNBASE (VM_MIN_KERNEL_ADDRESS) -#define VM_MAX_KERNEL_ADDRESS (vm_max_kernel_address) - /* * Initial pagein size of beginning of executable file. */ diff --git a/sys/sparc64/sparc64/exception.S b/sys/sparc64/sparc64/exception.S index fa0f64047582..9a9418e5543f 100644 --- a/sys/sparc64/sparc64/exception.S +++ b/sys/sparc64/sparc64/exception.S @@ -1443,27 +1443,16 @@ ENTRY(tl1_dmmu_miss_trap) END(tl1_dmmu_miss_trap) ENTRY(tl1_dmmu_miss_direct) - /* - * Check the cache bits in the virtual address to see if this mapping - * is virtually cacheable. We set this up so that the masks fit in - * immediates... Note that the arithmetic shift sign extends, keeping - * all the top bits set. - */ - srax %g5, TLB_DIRECT_SHIFT, %g5 - andcc %g5, TLB_DIRECT_UNCACHEABLE, %g0 - mov TD_CP | TD_CV | TD_W, %g6 - movnz %xcc, TD_CP | TD_W, %g6 - or %g5, %g6, %g5 - /* * Mask off the high bits of the virtual address to get the physical - * address, and or in the tte bits. The high bit is left set in the - * physical address, which corresponds to the tte valid bit, so that - * we don't have to include it in the tte bits. We ignore the cache - * bits, since they get shifted into the soft tte bits anyway. + * address, and or in the tte bits. The virtual address bits that + * correspond to the tte valid and page size bits are left set, so + * they don't have to be included in the tte bits below. We know they + * are set because the virtual address is in the upper va hole. */ - setx TLB_DIRECT_MASK & ~TD_V, %g7, %g6 - andn %g5, %g6, %g5 + setx TLB_DIRECT_TO_TTE_MASK, %g7, %g6 + and %g5, %g6, %g5 + or %g5, TD_CP | TD_CV | TD_W, %g5 /* * Load the tte data into the TLB and retry the instruction. diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index aaf39c8dc557..ea70b0ce248a 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include #include @@ -93,9 +95,7 @@ ASSYM(TLB_DEMAP_NUCLEUS, TLB_DEMAP_NUCLEUS); ASSYM(TLB_DEMAP_PRIMARY, TLB_DEMAP_PRIMARY); ASSYM(TLB_DEMAP_CONTEXT, TLB_DEMAP_CONTEXT); ASSYM(TLB_DEMAP_PAGE, TLB_DEMAP_PAGE); -ASSYM(TLB_DIRECT_MASK, TLB_DIRECT_MASK); -ASSYM(TLB_DIRECT_UNCACHEABLE, TLB_DIRECT_UNCACHEABLE); -ASSYM(TLB_DIRECT_SHIFT, TLB_DIRECT_SHIFT); +ASSYM(TLB_DIRECT_TO_TTE_MASK, TLB_DIRECT_TO_TTE_MASK); ASSYM(TSB_BUCKET_MASK, TSB_BUCKET_MASK); ASSYM(TSB_BUCKET_SHIFT, TSB_BUCKET_SHIFT); diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index 4e57d77c09b8..b9945ecc8146 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -132,6 +132,10 @@ int sparc64_nmemreg; static struct ofw_map translations[128]; static int translations_size; +static vm_offset_t pmap_idle_map; +static vm_offset_t pmap_temp_map_1; +static vm_offset_t pmap_temp_map_2; + /* * First and last available kernel virtual addresses. */ @@ -153,8 +157,6 @@ struct pmap kernel_pmap_store; */ static vm_offset_t pmap_bootstrap_alloc(vm_size_t size); -static vm_offset_t pmap_map_direct(vm_page_t m); - extern int tl1_immu_miss_patch_1[]; extern int tl1_immu_miss_patch_2[]; extern int tl1_dmmu_miss_patch_1[]; @@ -384,6 +386,16 @@ pmap_bootstrap(vm_offset_t ekva) virtual_end = vm_max_kernel_address; kernel_vm_end = vm_max_kernel_address; + /* + * Allocate kva space for temporary mappings. + */ + pmap_idle_map = virtual_avail; + virtual_avail += PAGE_SIZE * DCACHE_COLORS; + pmap_temp_map_1 = virtual_avail; + virtual_avail += PAGE_SIZE * DCACHE_COLORS; + pmap_temp_map_2 = virtual_avail; + virtual_avail += PAGE_SIZE * DCACHE_COLORS; + /* * Allocate virtual address space for the message buffer. */ @@ -499,9 +511,9 @@ pmap_map_tsb(void) for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) { va = (vm_offset_t)tsb_kernel + i; pa = tsb_kernel_phys + i; - /* XXX - cheetah */ data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV | TD_P | TD_W; + /* XXX - cheetah */ stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) | TLB_TAR_CTX(TLB_CTX_KERNEL)); stxa_sync(0, ASI_DTLB_DATA_IN_REG, data); @@ -648,6 +660,8 @@ pmap_kextract(vm_offset_t va) { struct tte *tp; + if (va >= VM_MIN_DIRECT_ADDRESS) + return (TLB_DIRECT_TO_PHYS(va)); tp = tsb_kvtotte(va); if ((tp->tte_data & TD_V) == 0) return (0); @@ -880,8 +894,7 @@ pmap_kremove_flags(vm_offset_t va) * The value passed in *virt is a suggested virtual address for the mapping. * Architectures which can support a direct-mapped physical to virtual region * can return the appropriate address within that region, leaving '*virt' - * unchanged. We cannot and therefore do not; *virt is updated with the - * first usable address after the mapped region. + * unchanged. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_offset_t pa_start, vm_offset_t pa_end, int prot) @@ -905,26 +918,6 @@ pmap_map(vm_offset_t *virt, vm_offset_t pa_start, vm_offset_t pa_end, int prot) return (sva); } -static vm_offset_t -pmap_map_direct(vm_page_t m) -{ - vm_offset_t pa; - vm_offset_t va; - - pa = VM_PAGE_TO_PHYS(m); - if (m->md.color == -1) { - KASSERT(m->md.colors[0] != 0 && m->md.colors[1] != 0, - ("pmap_map_direct: non-cacheable, only 1 color")); - va = TLB_DIRECT_MASK | pa | TLB_DIRECT_UNCACHEABLE; - } else { - KASSERT(m->md.colors[DCACHE_OTHER_COLOR(m->md.color)] == 0, - ("pmap_map_direct: cacheable, mappings of other color")); - va = TLB_DIRECT_MASK | pa | - (m->md.color << TLB_DIRECT_COLOR_SHIFT); - } - return (va << TLB_DIRECT_SHIFT); -} - /* * Map a list of wired pages into kernel virtual address space. This is * intended for temporary mappings which do not need page modification or @@ -1599,57 +1592,139 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, } } -/* - * Zero a page of physical memory by temporarily mapping it into the tlb. - */ void pmap_zero_page(vm_page_t m) { + vm_offset_t pa; vm_offset_t va; + struct tte *tp; - va = pmap_map_direct(m); - CTR2(KTR_PMAP, "pmap_zero_page: pa=%#lx va=%#lx", - VM_PAGE_TO_PHYS(m), va); - bzero((void *)va, PAGE_SIZE); + pa = VM_PAGE_TO_PHYS(m); + if (m->md.color == -1) + aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE); + else if (m->md.color == DCACHE_COLOR(pa)) { + va = TLB_PHYS_TO_DIRECT(pa); + bzero((void *)va, PAGE_SIZE); + } else { + va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE); + tp = tsb_kvtotte(va); + tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W; + tp->tte_vpn = TV_VPN(va, TS_8K); + bzero((void *)va, PAGE_SIZE); + tlb_page_demap(kernel_pmap, va); + } } void pmap_zero_page_area(vm_page_t m, int off, int size) { + vm_offset_t pa; vm_offset_t va; + struct tte *tp; KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size")); - va = pmap_map_direct(m); - CTR4(KTR_PMAP, "pmap_zero_page_area: pa=%#lx va=%#lx off=%#x size=%#x", - VM_PAGE_TO_PHYS(m), va, off, size); - bzero((void *)(va + off), size); + pa = VM_PAGE_TO_PHYS(m); + if (m->md.color == -1) + aszero(ASI_PHYS_USE_EC, pa + off, size); + else if (m->md.color == DCACHE_COLOR(pa)) { + va = TLB_PHYS_TO_DIRECT(pa); + bzero((void *)(va + off), size); + } else { + va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE); + tp = tsb_kvtotte(va); + tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W; + tp->tte_vpn = TV_VPN(va, TS_8K); + bzero((void *)(va + off), size); + tlb_page_demap(kernel_pmap, va); + } } void pmap_zero_page_idle(vm_page_t m) { + vm_offset_t pa; vm_offset_t va; + struct tte *tp; - va = pmap_map_direct(m); - CTR2(KTR_PMAP, "pmap_zero_page_idle: pa=%#lx va=%#lx", - VM_PAGE_TO_PHYS(m), va); - bzero((void *)va, PAGE_SIZE); + pa = VM_PAGE_TO_PHYS(m); + if (m->md.color == -1) + aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE); + else if (m->md.color == DCACHE_COLOR(pa)) { + va = TLB_PHYS_TO_DIRECT(pa); + bzero((void *)va, PAGE_SIZE); + } else { + va = pmap_idle_map + (m->md.color * PAGE_SIZE); + tp = tsb_kvtotte(va); + tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W; + tp->tte_vpn = TV_VPN(va, TS_8K); + bzero((void *)va, PAGE_SIZE); + tlb_page_demap(kernel_pmap, va); + } } -/* - * Copy a page of physical memory by temporarily mapping it into the tlb. - */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { - vm_offset_t dst; - vm_offset_t src; + vm_offset_t pdst; + vm_offset_t psrc; + vm_offset_t vdst; + vm_offset_t vsrc; + struct tte *tp; - src = pmap_map_direct(msrc); - dst = pmap_map_direct(mdst); - CTR4(KTR_PMAP, "pmap_zero_page: src=%#lx va=%#lx dst=%#lx va=%#lx", - VM_PAGE_TO_PHYS(msrc), src, VM_PAGE_TO_PHYS(mdst), dst); - bcopy((void *)src, (void *)dst, PAGE_SIZE); + pdst = VM_PAGE_TO_PHYS(mdst); + psrc = VM_PAGE_TO_PHYS(msrc); + if (msrc->md.color == -1 && mdst->md.color == -1) + ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE); + else if (msrc->md.color == DCACHE_COLOR(psrc) && + mdst->md.color == DCACHE_COLOR(pdst)) { + vdst = TLB_PHYS_TO_DIRECT(pdst); + vsrc = TLB_PHYS_TO_DIRECT(psrc); + bcopy((void *)vsrc, (void *)vdst, PAGE_SIZE); + } else if (msrc->md.color == -1) { + if (mdst->md.color == DCACHE_COLOR(pdst)) { + vdst = TLB_PHYS_TO_DIRECT(pdst); + ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst, + PAGE_SIZE); + } else { + vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE); + tp = tsb_kvtotte(vdst); + tp->tte_data = + TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W; + tp->tte_vpn = TV_VPN(vdst, TS_8K); + ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst, + PAGE_SIZE); + tlb_page_demap(kernel_pmap, vdst); + } + } else if (mdst->md.color == -1) { + if (msrc->md.color == DCACHE_COLOR(psrc)) { + vsrc = TLB_PHYS_TO_DIRECT(psrc); + ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst, + PAGE_SIZE); + } else { + vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE); + tp = tsb_kvtotte(vsrc); + tp->tte_data = + TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W; + tp->tte_vpn = TV_VPN(vsrc, TS_8K); + ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst, + PAGE_SIZE); + tlb_page_demap(kernel_pmap, vsrc); + } + } else { + vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE); + tp = tsb_kvtotte(vdst); + tp->tte_data = + TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W; + tp->tte_vpn = TV_VPN(vdst, TS_8K); + vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE); + tp = tsb_kvtotte(vsrc); + tp->tte_data = + TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W; + tp->tte_vpn = TV_VPN(vsrc, TS_8K); + bcopy((void *)vsrc, (void *)vdst, PAGE_SIZE); + tlb_page_demap(kernel_pmap, vdst); + tlb_page_demap(kernel_pmap, vsrc); + } } /*