Paul Saab c794ceb56a Implement write combining for crashdumps. This is useful when
write caching is disabled on both SCSI and IDE disks where large
memory dumps could take up to an hour to complete.

Taking an i386 scsi based system with 512MB of ram and timing (in
seconds) how long it took to complete a dump, the following results
were obtained:

Before:				After:
	WCE           TIME		WCE           TIME
	------------------		------------------
	1	141.820972		1	 15.600111
	0	797.265072		0	 65.480465

Obtained from:	Yahoo!
Reviewed by:	peter
2000-10-17 10:05:49 +00:00

3164 lines
71 KiB
C

/*
* Copyright (c) 1991 Regents of the University of California.
* All rights reserved.
* Copyright (c) 1994 John S. Dyson
* All rights reserved.
* Copyright (c) 1994 David Greenman
* All rights reserved.
* Copyright (c) 1998 Doug Rabson
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department and William Jolitz of UUNET Technologies Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
* from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
* with some ideas from NetBSD's alpha pmap
* $FreeBSD$
*/
/*
* Manages physical address maps.
*
* In addition to hardware address maps, this
* module is called upon to provide software-use-only
* maps which may or may not be stored in the same
* form as hardware maps. These pseudo-maps are
* used to store intermediate results from copy
* operations to and from address spaces.
*
* Since the information managed by this module is
* also stored by the logical address mapping module,
* this module may throw away valid virtual-to-physical
* mappings at almost any time. However, invalidations
* of virtual-to-physical mappings must be done as
* requested.
*
* In order to cope with hardware architectures which
* make virtual-to-physical map invalidates expensive,
* this module may delay invalidate or reduced protection
* operations until such time as they are actually
* necessary. This module is given full information as
* to which processors are currently using which maps,
* and to when physical maps must be made correct.
*/
/*
* Notes for alpha pmap.
*
* On alpha, pm_pdeobj will hold lev1, lev2 and lev3 page tables.
* Indices from 0 to NUSERLEV3MAPS-1 will map user lev3 page tables,
* indices from NUSERLEV3MAPS to NUSERLEV3MAPS+NUSERLEV2MAPS-1 will
* map user lev2 page tables and index NUSERLEV3MAPS+NUSERLEV2MAPS
* will map the lev1 page table. The lev1 table will self map at
* address VADDR(PTLEV1I,0,0).
*
* The vm_object kptobj holds the kernel page tables on i386 (62 or 63
* of them, depending on whether the system is SMP). On alpha, kptobj
* will hold the lev3 and lev2 page tables for K1SEG. Indices 0 to
* NKLEV3MAPS-1 will map kernel lev3 page tables and indices
* NKLEV3MAPS to NKLEV3MAPS+NKLEV2MAPS will map lev2 page tables. (XXX
* should the kernel Lev1map be inserted into this object?).
*
* pvtmmap is not needed for alpha since K0SEG maps all of physical
* memory.
*
*
* alpha virtual memory map:
*
*
* Address Lev1 index
*
* ---------------------------------
* 0000000000000000 | | 0
* | |
* | |
* | |
* | |
* --- ---
* User space (USEG)
* --- ---
* | |
* | |
* | |
* | |
* 000003ffffffffff | | 511=UMAXLEV1I
* ---------------------------------
* fffffc0000000000 | | 512=K0SEGLEV1I
* | Kernel code/data/bss |
* | |
* | |
* | |
* --- ---
* K0SEG
* --- ---
* | |
* | 1-1 physical/virtual |
* | |
* | |
* fffffdffffffffff | |
* ---------------------------------
* fffffe0000000000 | | 768=K1SEGLEV1I
* | Kernel dynamic data |
* | |
* | |
* | |
* --- ---
* K1SEG
* --- ---
* | |
* | mapped by ptes |
* | |
* | |
* fffffff7ffffffff | |
* ---------------------------------
* fffffffe00000000 | | 1023=PTLEV1I
* | PTmap (pte self map) |
* ffffffffffffffff | |
* ---------------------------------
*
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/msgbuf.h>
#include <sys/vmmeter.h>
#include <sys/mman.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <sys/lock.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/vm_zone.h>
#include <sys/user.h>
#include <machine/md_var.h>
#include <machine/rpb.h>
#include <machine/smp.h>
#ifndef PMAP_SHPGPERPROC
#define PMAP_SHPGPERPROC 200
#endif
#if defined(DIAGNOSTIC)
#define PMAP_DIAGNOSTIC
#endif
#define MINPV 2048
#if 0
#define PMAP_DIAGNOSTIC
#define PMAP_DEBUG
#endif
#if !defined(PMAP_DIAGNOSTIC)
#define PMAP_INLINE __inline
#else
#define PMAP_INLINE
#endif
#if 0
static void
pmap_break(void)
{
}
/* #define PMAP_DEBUG_VA(va) if ((va) == 0x120058000) pmap_break(); else */
#endif
#ifndef PMAP_DEBUG_VA
#define PMAP_DEBUG_VA(va) do {} while(0)
#endif
/*
* Some macros for manipulating virtual addresses
*/
#define ALPHA_L1SIZE (1L << ALPHA_L1SHIFT)
#define ALPHA_L2SIZE (1L << ALPHA_L2SHIFT)
#define alpha_l1trunc(va) ((va) & ~(ALPHA_L1SIZE-1))
#define alpha_l2trunc(va) ((va) & ~(ALPHA_L2SIZE-1))
/*
* Get PDEs and PTEs for user/kernel address space
*/
#define pmap_pte_w(pte) ((*(pte) & PG_W) != 0)
#define pmap_pte_managed(pte) ((*(pte) & PG_MANAGED) != 0)
#define pmap_pte_v(pte) ((*(pte) & PG_V) != 0)
#define pmap_pte_pa(pte) alpha_ptob(ALPHA_PTE_TO_PFN(*(pte)))
#define pmap_pte_prot(pte) (*(pte) & PG_PROT)
#define pmap_pte_set_w(pte, v) ((v)?(*pte |= PG_W):(*pte &= ~PG_W))
#define pmap_pte_set_prot(pte, v) ((*pte &= ~PG_PROT), (*pte |= (v)))
/*
* Given a map and a machine independent protection code,
* convert to an alpha protection code.
*/
#define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p])
int protection_codes[2][8];
/*
* Return non-zero if this pmap is currently active
*/
#define pmap_isactive(pmap) (pmap->pm_active)
/*
* Extract level 1, 2 and 3 page table indices from a va
*/
#define PTMASK ((1 << ALPHA_PTSHIFT) - 1)
#define pmap_lev1_index(va) (((va) >> ALPHA_L1SHIFT) & PTMASK)
#define pmap_lev2_index(va) (((va) >> ALPHA_L2SHIFT) & PTMASK)
#define pmap_lev3_index(va) (((va) >> ALPHA_L3SHIFT) & PTMASK)
/*
* Given a physical address, construct a pte
*/
#define pmap_phys_to_pte(pa) ALPHA_PTE_FROM_PFN(alpha_btop(pa))
/*
* Given a page frame number, construct a k0seg va
*/
#define pmap_k0seg_to_pfn(va) alpha_btop(ALPHA_K0SEG_TO_PHYS(va))
/*
* Given a pte, construct a k0seg va
*/
#define pmap_k0seg_to_pte(va) ALPHA_PTE_FROM_PFN(pmap_k0seg_to_pfn(va))
/*
* Lev1map:
*
* Kernel level 1 page table. This maps all kernel level 2
* page table pages, and is used as a template for all user
* pmap level 1 page tables. When a new user level 1 page
* table is allocated, all Lev1map PTEs for kernel addresses
* are copied to the new map.
*
* Lev2map:
*
* Initial set of kernel level 2 page table pages. These
* map the kernel level 3 page table pages. As kernel
* level 3 page table pages are added, more level 2 page
* table pages may be added to map them. These pages are
* never freed.
*
* Lev3map:
*
* Initial set of kernel level 3 page table pages. These
* map pages in K1SEG. More level 3 page table pages may
* be added at run-time if additional K1SEG address space
* is required. These pages are never freed.
*
* Lev2mapsize:
*
* Number of entries in the initial Lev2map.
*
* Lev3mapsize:
*
* Number of entries in the initial Lev3map.
*
* NOTE: When mappings are inserted into the kernel pmap, all
* level 2 and level 3 page table pages must already be allocated
* and mapped into the parent page table.
*/
pt_entry_t *Lev1map, *Lev2map, *Lev3map;
vm_size_t Lev2mapsize, Lev3mapsize;
/*
* Statically allocated kernel pmap
*/
static struct pmap kernel_pmap_store;
pmap_t kernel_pmap;
vm_offset_t avail_start; /* PA of first available physical page */
vm_offset_t avail_end; /* PA of last available physical page */
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */
static vm_object_t kptobj;
static int nklev3, nklev2;
vm_offset_t kernel_vm_end;
/*
* Data for the ASN allocator
*/
static int pmap_maxasn;
static pmap_t pmap_active[MAXCPU];
/*
* Data for the pv entry allocation mechanism
*/
static vm_zone_t pvzone;
static struct vm_zone pvzone_store;
static struct vm_object pvzone_obj;
static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
static int pmap_pagedaemon_waken = 0;
static struct pv_entry *pvinit;
static PMAP_INLINE void free_pv_entry __P((pv_entry_t pv));
static pv_entry_t get_pv_entry __P((void));
static void alpha_protection_init __P((void));
static void pmap_changebit __P((vm_page_t m, int bit, boolean_t setem));
static void pmap_remove_all __P((vm_page_t m));
static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_page_t mpte));
static int pmap_remove_pte __P((pmap_t pmap, pt_entry_t* ptq, vm_offset_t sva));
static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m,
vm_offset_t va));
static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
vm_page_t mpte, vm_page_t m));
static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
/*
* Routine: pmap_lev1pte
* Function:
* Extract the level 1 page table entry associated
* with the given map/virtual_address pair.
*/
static PMAP_INLINE pt_entry_t*
pmap_lev1pte(pmap_t pmap, vm_offset_t va)
{
if (!pmap)
return 0;
return &pmap->pm_lev1[pmap_lev1_index(va)];
}
/*
* Routine: pmap_lev2pte
* Function:
* Extract the level 2 page table entry associated
* with the given map/virtual_address pair.
*/
static PMAP_INLINE pt_entry_t*
pmap_lev2pte(pmap_t pmap, vm_offset_t va)
{
pt_entry_t* l1pte;
pt_entry_t* l2map;
l1pte = pmap_lev1pte(pmap, va);
if (!pmap_pte_v(l1pte))
return 0;
l2map = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(pmap_pte_pa(l1pte));
return &l2map[pmap_lev2_index(va)];
}
/*
* Routine: pmap_lev3pte
* Function:
* Extract the level 3 page table entry associated
* with the given map/virtual_address pair.
*/
static PMAP_INLINE pt_entry_t*
pmap_lev3pte(pmap_t pmap, vm_offset_t va)
{
pt_entry_t* l2pte;
pt_entry_t* l3map;
l2pte = pmap_lev2pte(pmap, va);
if (!l2pte || !pmap_pte_v(l2pte))
return 0;
l3map = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(pmap_pte_pa(l2pte));
return &l3map[pmap_lev3_index(va)];
}
vm_offset_t
pmap_steal_memory(vm_size_t size)
{
vm_size_t bank_size;
vm_offset_t pa, va;
size = round_page(size);
bank_size = phys_avail[1] - phys_avail[0];
while (size > bank_size) {
int i;
for (i = 0; phys_avail[i+2]; i+= 2) {
phys_avail[i] = phys_avail[i+2];
phys_avail[i+1] = phys_avail[i+3];
}
phys_avail[i] = 0;
phys_avail[i+1] = 0;
if (!phys_avail[0])
panic("pmap_steal_memory: out of memory");
bank_size = phys_avail[1] - phys_avail[0];
}
pa = phys_avail[0];
phys_avail[0] += size;
va = ALPHA_PHYS_TO_K0SEG(pa);
bzero((caddr_t) va, size);
return va;
}
extern pt_entry_t rom_pte; /* XXX */
extern int prom_mapped; /* XXX */
/*
* Bootstrap the system enough to run with virtual memory.
*/
void
pmap_bootstrap(vm_offset_t ptaddr, u_int maxasn)
{
pt_entry_t newpte;
int i;
/*
* Setup ASNs. PCPU_GET(next_asn) and PCPU_GET(current_asngen) are set
* up already.
*/
pmap_maxasn = maxasn;
/*
* Allocate a level 1 map for the kernel.
*/
Lev1map = (pt_entry_t*) pmap_steal_memory(PAGE_SIZE);
/*
* Allocate a level 2 map for the kernel
*/
Lev2map = (pt_entry_t*) pmap_steal_memory(PAGE_SIZE);
Lev2mapsize = PAGE_SIZE;
/*
* Allocate some level 3 maps for the kernel
*/
Lev3map = (pt_entry_t*) pmap_steal_memory(PAGE_SIZE*NKPT);
Lev3mapsize = NKPT * PAGE_SIZE;
/* Map all of the level 2 maps */
for (i = 0; i < howmany(Lev2mapsize, PAGE_SIZE); i++) {
unsigned long pfn =
pmap_k0seg_to_pfn((vm_offset_t) Lev2map) + i;
newpte = ALPHA_PTE_FROM_PFN(pfn);
newpte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_W;
Lev1map[K1SEGLEV1I + i] = newpte;
}
/* Setup the mapping for the prom console */
{
if (pmap_uses_prom_console()) {
/* XXX save old pte so that we can remap prom if necessary */
rom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM; /* XXX */
}
prom_mapped = 0;
/*
* Actually, this code lies. The prom is still mapped, and will
* remain so until the context switch after alpha_init() returns.
* Printfs using the firmware before then will end up frobbing
* Lev1map unnecessarily, but that's OK.
*/
}
/*
* Level 1 self mapping.
*
* Don't set PG_ASM since the self-mapping is different for each
* address space.
*/
newpte = pmap_k0seg_to_pte((vm_offset_t) Lev1map);
newpte |= PG_V | PG_KRE | PG_KWE;
Lev1map[PTLEV1I] = newpte;
/* Map all of the level 3 maps */
for (i = 0; i < howmany(Lev3mapsize, PAGE_SIZE); i++) {
unsigned long pfn =
pmap_k0seg_to_pfn((vm_offset_t) Lev3map) + i;
newpte = ALPHA_PTE_FROM_PFN(pfn);
newpte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_W;
Lev2map[i] = newpte;
}
avail_start = phys_avail[0];
for (i = 0; phys_avail[i+2]; i+= 2) ;
avail_end = phys_avail[i+1];
virtual_avail = VM_MIN_KERNEL_ADDRESS;
virtual_end = VPTBASE;
/*
* Initialize protection array.
*/
alpha_protection_init();
/*
* The kernel's pmap is statically allocated so we don't have to use
* pmap_create, which is unlikely to work correctly at this part of
* the boot sequence (XXX and which no longer exists).
*/
kernel_pmap = &kernel_pmap_store;
kernel_pmap->pm_lev1 = Lev1map;
kernel_pmap->pm_count = 1;
kernel_pmap->pm_active = ~0;
kernel_pmap->pm_asn[alpha_pal_whami()].asn = 0;
kernel_pmap->pm_asn[alpha_pal_whami()].gen = 1;
TAILQ_INIT(&kernel_pmap->pm_pvlist);
nklev3 = NKPT;
nklev2 = 1;
/*
* Set up proc0's PCB such that the ptbr points to the right place
* and has the kernel pmap's.
*/
proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr =
ALPHA_K0SEG_TO_PHYS((vm_offset_t)Lev1map) >> PAGE_SHIFT;
proc0.p_addr->u_pcb.pcb_hw.apcb_asn = 0;
}
int
pmap_uses_prom_console()
{
int cputype;
cputype = hwrpb->rpb_type;
return (cputype == ST_DEC_21000 || ST_DEC_4100
|| cputype == ST_DEC_3000_300
|| cputype == ST_DEC_3000_500);
return 0;
}
/*
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
* system needs to map virtual memory.
* pmap_init has been enhanced to support in a fairly consistant
* way, discontiguous physical memory.
*/
void
pmap_init(phys_start, phys_end)
vm_offset_t phys_start, phys_end;
{
int i;
int initial_pvs;
/*
* Allocate memory for random pmap data structures. Includes the
* pv_head_table.
*/
for(i = 0; i < vm_page_array_size; i++) {
vm_page_t m;
m = &vm_page_array[i];
TAILQ_INIT(&m->md.pv_list);
m->md.pv_list_count = 0;
m->md.pv_flags = 0;
}
/*
* init the pv free list
*/
initial_pvs = vm_page_array_size;
if (initial_pvs < MINPV)
initial_pvs = MINPV;
pvzone = &pvzone_store;
pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
initial_pvs * sizeof (struct pv_entry));
zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
vm_page_array_size);
/*
* object for kernel page table pages
*/
kptobj = vm_object_allocate(OBJT_DEFAULT, NKLEV3MAPS + NKLEV2MAPS);
/*
* Now it is safe to enable pv_table recording.
*/
pmap_initialized = TRUE;
}
/*
* Initialize the address space (zone) for the pv_entries. Set a
* high water mark so that the system can recover from excessive
* numbers of pv entries.
*/
void
pmap_init2()
{
pv_entry_max = PMAP_SHPGPERPROC * maxproc + vm_page_array_size;
pv_entry_high_water = 9 * (pv_entry_max / 10);
zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
}
/***************************************************
* Manipulate TLBs for a pmap
***************************************************/
static void
pmap_invalidate_asn(pmap_t pmap)
{
pmap->pm_asn[PCPU_GET(cpuno)].gen = 0;
}
struct pmap_invalidate_page_arg {
pmap_t pmap;
vm_offset_t va;
};
static void
pmap_invalidate_page_action(void *arg)
{
pmap_t pmap = ((struct pmap_invalidate_page_arg *) arg)->pmap;
vm_offset_t va = ((struct pmap_invalidate_page_arg *) arg)->va;
if (pmap->pm_active & (1 << PCPU_GET(cpuno))) {
ALPHA_TBIS(va);
alpha_pal_imb(); /* XXX overkill? */
} else {
pmap_invalidate_asn(pmap);
}
}
static void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
struct pmap_invalidate_page_arg arg;
arg.pmap = pmap;
arg.va = va;
smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *) &arg);
}
static void
pmap_invalidate_all_action(void *arg)
{
pmap_t pmap = (pmap_t) arg;
if (pmap->pm_active & (1 << PCPU_GET(cpuno))) {
ALPHA_TBIA();
alpha_pal_imb(); /* XXX overkill? */
} else
pmap_invalidate_asn(pmap);
}
static void
pmap_invalidate_all(pmap_t pmap)
{
smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *) pmap);
}
static void
pmap_get_asn(pmap_t pmap)
{
if (pmap->pm_asn[PCPU_GET(cpuno)].gen != PCPU_GET(current_asngen)) {
if (PCPU_GET(next_asn) > pmap_maxasn) {
/*
* Start a new ASN generation.
*
* Invalidate all per-process mappings and I-cache
*/
PCPU_GET(next_asn) = 0;
PCPU_GET(current_asngen)++;
PCPU_GET(current_asngen) &= (1 << 24) - 1;
if (PCPU_GET(current_asngen) == 0) {
/*
* Clear the pm_asn[].gen of all pmaps.
* This is safe since it is only called from
* pmap_activate after it has deactivated
* the old pmap and it only affects this cpu.
*/
struct proc *p;
pmap_t tpmap;
#ifdef PMAP_DIAGNOSTIC
printf("pmap_get_asn: generation rollover\n");
#endif
PCPU_GET(current_asngen) = 1;
LIST_FOREACH(p, &allproc, p_list) {
if (p->p_vmspace) {
tpmap = vmspace_pmap(p->p_vmspace);
tpmap->pm_asn[PCPU_GET(cpuno)].gen = 0;
}
}
}
/*
* Since we are about to start re-using ASNs, we must
* clear out the TLB and the I-cache since they are tagged
* with the ASN.
*/
ALPHA_TBIAP();
alpha_pal_imb(); /* XXX overkill? */
}
pmap->pm_asn[PCPU_GET(cpuno)].asn = PCPU_GET(next_asn)++;
pmap->pm_asn[PCPU_GET(cpuno)].gen = PCPU_GET(current_asngen);
}
}
/***************************************************
* Low level helper routines.....
***************************************************/
/*
* this routine defines the region(s) of memory that should
* not be tested for the modified bit.
*/
static PMAP_INLINE int
pmap_track_modified(vm_offset_t va)
{
if ((va < clean_sva) || (va >= clean_eva))
return 1;
else
return 0;
}
/*
* Routine: pmap_extract
* Function:
* Extract the physical page address associated
* with the given map/virtual_address pair.
*/
vm_offset_t
pmap_extract(pmap, va)
register pmap_t pmap;
vm_offset_t va;
{
pt_entry_t* pte = pmap_lev3pte(pmap, va);
if (pte)
return alpha_ptob(ALPHA_PTE_TO_PFN(*pte));
else
return 0;
}
/***************************************************
* Low level mapping routines.....
***************************************************/
/*
* Add a list of wired pages to the kva
* this routine is only used for temporary
* kernel mappings that do not need to have
* page modification or references recorded.
* Note that old mappings are simply written
* over. The page *must* be wired.
*/
void
pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
{
int i;
pt_entry_t *pte;
for (i = 0; i < count; i++) {
vm_offset_t tva = va + i * PAGE_SIZE;
pt_entry_t npte = pmap_phys_to_pte(VM_PAGE_TO_PHYS(m[i]))
| PG_ASM | PG_KRE | PG_KWE | PG_V;
pt_entry_t opte;
pte = vtopte(tva);
opte = *pte;
PMAP_DEBUG_VA(va);
*pte = npte;
if (opte)
pmap_invalidate_page(kernel_pmap, tva);
}
}
/*
* this routine jerks page mappings from the
* kernel -- it is meant only for temporary mappings.
*/
void
pmap_qremove(va, count)
vm_offset_t va;
int count;
{
int i;
register pt_entry_t *pte;
for (i = 0; i < count; i++) {
pte = vtopte(va);
PMAP_DEBUG_VA(va);
*pte = 0;
pmap_invalidate_page(kernel_pmap, va);
va += PAGE_SIZE;
}
}
/*
* add a wired page to the kva
* note that in order for the mapping to take effect -- you
* should do a invltlb after doing the pmap_kenter...
*/
PMAP_INLINE void
pmap_kenter(vm_offset_t va, vm_offset_t pa)
{
pt_entry_t *pte;
pt_entry_t npte, opte;
npte = pmap_phys_to_pte(pa) | PG_ASM | PG_KRE | PG_KWE | PG_V;
pte = vtopte(va);
opte = *pte;
PMAP_DEBUG_VA(va);
*pte = npte;
if (opte)
pmap_invalidate_page(kernel_pmap, va);
}
/*
* remove a page from the kernel pagetables
*/
PMAP_INLINE void
pmap_kremove(vm_offset_t va)
{
register pt_entry_t *pte;
pte = vtopte(va);
PMAP_DEBUG_VA(va);
*pte = 0;
pmap_invalidate_page(kernel_pmap, va);
}
/*
* Used to map a range of physical addresses into kernel
* virtual address space.
*
* For now, VM is already on, we only need to map the
* specified memory.
*/
vm_offset_t
pmap_map(vm_offset_t virt, vm_offset_t start, vm_offset_t end, int prot)
{
while (start < end) {
pmap_kenter(virt, start);
virt += PAGE_SIZE;
start += PAGE_SIZE;
}
return (virt);
}
static vm_page_t
pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
{
vm_page_t m;
retry:
m = vm_page_lookup(object, pindex);
if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
goto retry;
return m;
}
/*
* Create the UPAGES for a new process.
* This routine directly affects the fork perf for a process.
*/
void
pmap_new_proc(struct proc *p)
{
int i;
vm_object_t upobj;
vm_page_t m;
struct user *up;
pt_entry_t *ptek, oldpte;
/*
* allocate object for the upages
*/
if ((upobj = p->p_upages_obj) == NULL) {
upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
p->p_upages_obj = upobj;
}
/* get a kernel virtual address for the UPAGES for this proc */
if ((up = p->p_addr) == NULL) {
up = (struct user *) kmem_alloc_nofault(kernel_map,
UPAGES * PAGE_SIZE);
if (up == NULL)
panic("pmap_new_proc: u_map allocation failed");
p->p_addr = up;
}
ptek = vtopte((vm_offset_t) up);
for(i=0;i<UPAGES;i++) {
/*
* Get a kernel stack page
*/
m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
/*
* Wire the page
*/
m->wire_count++;
cnt.v_wire_count++;
oldpte = *(ptek + i);
/*
* Enter the page into the kernel address space.
*/
*(ptek + i) = pmap_phys_to_pte(VM_PAGE_TO_PHYS(m))
| PG_ASM | PG_KRE | PG_KWE | PG_V;
if (oldpte)
pmap_invalidate_page(kernel_pmap,
(vm_offset_t)up + i * PAGE_SIZE);
vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL;
}
}
/*
* Dispose the UPAGES for a process that has exited.
* This routine directly impacts the exit perf of a process.
*/
void
pmap_dispose_proc(p)
struct proc *p;
{
int i;
vm_object_t upobj;
vm_page_t m;
pt_entry_t *ptek, oldpte;
upobj = p->p_upages_obj;
ptek = vtopte((vm_offset_t) p->p_addr);
for(i=0;i<UPAGES;i++) {
if ((m = vm_page_lookup(upobj, i)) == NULL)
panic("pmap_dispose_proc: upage already missing???");
vm_page_busy(m);
oldpte = *(ptek + i);
*(ptek + i) = 0;
pmap_invalidate_page(kernel_pmap,
(vm_offset_t)p->p_addr + i * PAGE_SIZE);
vm_page_unwire(m, 0);
vm_page_free(m);
}
}
/*
* Allow the UPAGES for a process to be prejudicially paged out.
*/
void
pmap_swapout_proc(p)
struct proc *p;
{
int i;
vm_object_t upobj;
vm_page_t m;
/*
* Make sure we aren't fpcurproc.
*/
alpha_fpstate_save(p, 1);
upobj = p->p_upages_obj;
/*
* let the upages be paged
*/
for(i=0;i<UPAGES;i++) {
if ((m = vm_page_lookup(upobj, i)) == NULL)
panic("pmap_swapout_proc: upage already missing???");
vm_page_dirty(m);
vm_page_unwire(m, 0);
pmap_kremove((vm_offset_t)p->p_addr + PAGE_SIZE * i);
}
}
/*
* Bring the UPAGES for a specified process back in.
*/
void
pmap_swapin_proc(p)
struct proc *p;
{
int i,rv;
vm_object_t upobj;
vm_page_t m;
upobj = p->p_upages_obj;
for(i=0;i<UPAGES;i++) {
m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
VM_PAGE_TO_PHYS(m));
if (m->valid != VM_PAGE_BITS_ALL) {
rv = vm_pager_get_pages(upobj, &m, 1, 0);
if (rv != VM_PAGER_OK)
panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
m = vm_page_lookup(upobj, i);
m->valid = VM_PAGE_BITS_ALL;
}
vm_page_wire(m);
vm_page_wakeup(m);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
}
/*
* The pcb may be at a different physical address now so cache the
* new address.
*/
p->p_md.md_pcbpaddr = (void*) vtophys((vm_offset_t) &p->p_addr->u_pcb);
}
/***************************************************
* Page table page management routines.....
***************************************************/
/*
* This routine unholds page table pages, and if the hold count
* drops to zero, then it decrements the wire count.
*/
static int
_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
;
if (m->hold_count == 0) {
vm_offset_t pteva;
pt_entry_t* pte;
/*
* unmap the page table page
*/
if (m->pindex >= NUSERLEV3MAPS) {
/* Level 2 page table */
pte = pmap_lev1pte(pmap, va);
pteva = (vm_offset_t) PTlev2 + alpha_ptob(m->pindex - NUSERLEV3MAPS);
} else {
/* Level 3 page table */
pte = pmap_lev2pte(pmap, va);
pteva = (vm_offset_t) PTmap + alpha_ptob(m->pindex);
}
*pte = 0;
if (m->pindex < NUSERLEV3MAPS) {
/* unhold the level 2 page table */
vm_page_t lev2pg;
lev2pg = pmap_page_lookup(pmap->pm_pteobj,
NUSERLEV3MAPS + pmap_lev1_index(va));
vm_page_unhold(lev2pg);
if (lev2pg->hold_count == 0)
_pmap_unwire_pte_hold(pmap, va, lev2pg);
}
--pmap->pm_stats.resident_count;
/*
* Do a invltlb to make the invalidated mapping
* take effect immediately.
*/
pmap_invalidate_page(pmap, pteva);
if (pmap->pm_ptphint == m)
pmap->pm_ptphint = NULL;
/*
* If the page is finally unwired, simply free it.
*/
--m->wire_count;
if (m->wire_count == 0) {
if (m->flags & PG_WANTED) {
vm_page_flag_clear(m, PG_WANTED);
wakeup(m);
}
vm_page_busy(m);
vm_page_free_zero(m);
--cnt.v_wire_count;
}
return 1;
}
return 0;
}
static PMAP_INLINE int
pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
vm_page_unhold(m);
if (m->hold_count == 0)
return _pmap_unwire_pte_hold(pmap, va, m);
else
return 0;
}
/*
* After removing a page table entry, this routine is used to
* conditionally free the page, and manage the hold/wire counts.
*/
static int
pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
{
unsigned ptepindex;
if (va >= VM_MAXUSER_ADDRESS)
return 0;
if (mpte == NULL) {
ptepindex = (va >> ALPHA_L2SHIFT);
if (pmap->pm_ptphint &&
(pmap->pm_ptphint->pindex == ptepindex)) {
mpte = pmap->pm_ptphint;
} else {
mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
pmap->pm_ptphint = mpte;
}
}
return pmap_unwire_pte_hold(pmap, va, mpte);
}
void
pmap_pinit0(pmap)
struct pmap *pmap;
{
int i;
pmap->pm_lev1 = Lev1map;
pmap->pm_flags = 0;
pmap->pm_count = 1;
pmap->pm_ptphint = NULL;
pmap->pm_active = 0;
for (i = 0; i < MAXCPU; i++) {
pmap->pm_asn[i].asn = 0;
pmap->pm_asn[i].gen = 0;
}
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
}
/*
* Initialize a preallocated and zeroed pmap structure,
* such as one in a vmspace structure.
*/
void
pmap_pinit(pmap)
register struct pmap *pmap;
{
vm_page_t lev1pg;
int i;
/*
* allocate object for the ptes
*/
if (pmap->pm_pteobj == NULL)
pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUSERLEV3MAPS + NUSERLEV2MAPS + 1);
/*
* allocate the page directory page
*/
lev1pg = vm_page_grab(pmap->pm_pteobj, NUSERLEV3MAPS + NUSERLEV2MAPS,
VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
lev1pg->wire_count = 1;
++cnt.v_wire_count;
vm_page_flag_clear(lev1pg, PG_MAPPED | PG_BUSY); /* not mapped normally */
lev1pg->valid = VM_PAGE_BITS_ALL;
pmap->pm_lev1 = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(VM_PAGE_TO_PHYS(lev1pg));
if ((lev1pg->flags & PG_ZERO) == 0)
bzero(pmap->pm_lev1, PAGE_SIZE);
/* install self-referential address mapping entry (not PG_ASM) */
pmap->pm_lev1[PTLEV1I] = pmap_phys_to_pte(VM_PAGE_TO_PHYS(lev1pg))
| PG_V | PG_KRE | PG_KWE;
pmap->pm_flags = 0;
pmap->pm_count = 1;
pmap->pm_ptphint = NULL;
pmap->pm_active = 0;
for (i = 0; i < MAXCPU; i++) {
pmap->pm_asn[i].asn = 0;
pmap->pm_asn[i].gen = 0;
}
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
}
/*
* Wire in kernel global address entries. To avoid a race condition
* between pmap initialization and pmap_growkernel, this procedure
* should be called after the vmspace is attached to the process
* but before this pmap is activated.
*/
void
pmap_pinit2(pmap)
struct pmap *pmap;
{
bcopy(PTlev1 + K1SEGLEV1I, pmap->pm_lev1 + K1SEGLEV1I, nklev2 * PTESIZE);
}
static int
pmap_release_free_page(pmap_t pmap, vm_page_t p)
{
pt_entry_t* pte;
pt_entry_t* l2map;
if (p->pindex >= NUSERLEV3MAPS + NUSERLEV2MAPS)
/* level 1 page table */
pte = &pmap->pm_lev1[PTLEV1I];
else if (p->pindex >= NUSERLEV3MAPS)
/* level 2 page table */
pte = &pmap->pm_lev1[p->pindex - NUSERLEV3MAPS];
else {
/* level 3 page table */
pte = &pmap->pm_lev1[p->pindex >> ALPHA_PTSHIFT];
l2map = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(pmap_pte_pa(pte));
pte = &l2map[p->pindex & ((1 << ALPHA_PTSHIFT) - 1)];
}
/*
* This code optimizes the case of freeing non-busy
* page-table pages. Those pages are zero now, and
* might as well be placed directly into the zero queue.
*/
if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
return 0;
vm_page_busy(p);
/*
* Remove the page table page from the processes address space.
*/
*pte = 0;
pmap->pm_stats.resident_count--;
#ifdef PMAP_DEBUG
if (p->hold_count) {
panic("pmap_release: freeing held page table page");
}
#endif
/*
* Level1 pages need to have the kernel
* stuff cleared, so they can go into the zero queue also.
*/
if (p->pindex == NUSERLEV3MAPS + NUSERLEV2MAPS)
bzero(pmap->pm_lev1 + K1SEGLEV1I, nklev2 * PTESIZE);
if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
pmap->pm_ptphint = NULL;
#ifdef PMAP_DEBUG
{
u_long *lp = (u_long*) ALPHA_PHYS_TO_K0SEG(VM_PAGE_TO_PHYS(p));
u_long *ep = (u_long*) ((char*) lp + PAGE_SIZE);
for (; lp < ep; lp++)
if (*lp != 0)
panic("pmap_release_free_page: page not zero");
}
#endif
p->wire_count--;
cnt.v_wire_count--;
vm_page_free_zero(p);
return 1;
}
/*
* this routine is called if the page table page is not
* mapped correctly.
*/
static vm_page_t
_pmap_allocpte(pmap, ptepindex)
pmap_t pmap;
unsigned ptepindex;
{
pt_entry_t* pte;
vm_offset_t ptepa;
vm_page_t m;
/*
* Find or fabricate a new pagetable page
*/
m = vm_page_grab(pmap->pm_pteobj, ptepindex,
VM_ALLOC_ZERO | VM_ALLOC_RETRY);
KASSERT(m->queue == PQ_NONE,
("_pmap_allocpte: %p->queue != PQ_NONE", m));
if (m->wire_count == 0)
cnt.v_wire_count++;
m->wire_count++;
/*
* Increment the hold count for the page table page
* (denoting a new mapping.)
*/
m->hold_count++;
/*
* Map the pagetable page into the process address space, if
* it isn't already there.
*/
pmap->pm_stats.resident_count++;
ptepa = VM_PAGE_TO_PHYS(m);
if (ptepindex >= NUSERLEV3MAPS) {
pte = &pmap->pm_lev1[ptepindex - NUSERLEV3MAPS];
} else {
int l1index = ptepindex >> ALPHA_PTSHIFT;
pt_entry_t* l1pte = &pmap->pm_lev1[l1index];
pt_entry_t* l2map;
if (!pmap_pte_v(l1pte))
_pmap_allocpte(pmap, NUSERLEV3MAPS + l1index);
else {
vm_page_t l2page =
pmap_page_lookup(pmap->pm_pteobj,
NUSERLEV3MAPS + l1index);
l2page->hold_count++;
}
l2map = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(pmap_pte_pa(l1pte));
pte = &l2map[ptepindex & ((1 << ALPHA_PTSHIFT) - 1)];
}
*pte = pmap_phys_to_pte(ptepa) | PG_KRE | PG_KWE | PG_V;
/*
* Set the page table hint
*/
pmap->pm_ptphint = m;
if ((m->flags & PG_ZERO) == 0)
bzero((caddr_t) ALPHA_PHYS_TO_K0SEG(ptepa), PAGE_SIZE);
m->valid = VM_PAGE_BITS_ALL;
vm_page_flag_clear(m, PG_ZERO | PG_BUSY);
vm_page_flag_set(m, PG_MAPPED);
return m;
}
static vm_page_t
pmap_allocpte(pmap_t pmap, vm_offset_t va)
{
unsigned ptepindex;
pt_entry_t* lev2pte;
vm_page_t m;
/*
* Calculate pagetable page index
*/
ptepindex = va >> (PAGE_SHIFT + ALPHA_PTSHIFT);
/*
* Get the level2 entry
*/
lev2pte = pmap_lev2pte(pmap, va);
/*
* If the page table page is mapped, we just increment the
* hold count, and activate it.
*/
if (lev2pte && pmap_pte_v(lev2pte)) {
/*
* In order to get the page table page, try the
* hint first.
*/
if (pmap->pm_ptphint &&
(pmap->pm_ptphint->pindex == ptepindex)) {
m = pmap->pm_ptphint;
} else {
m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
pmap->pm_ptphint = m;
}
m->hold_count++;
return m;
}
/*
* Here if the pte page isn't mapped, or if it has been deallocated.
*/
return _pmap_allocpte(pmap, ptepindex);
}
/***************************************************
* Pmap allocation/deallocation routines.
***************************************************/
/*
* Release any resources held by the given physical map.
* Called when a pmap initialized by pmap_pinit is being released.
* Should only be called if the map contains no valid mappings.
*/
void
pmap_release(pmap_t pmap)
{
vm_page_t p,n,lev1pg;
vm_object_t object = pmap->pm_pteobj;
int curgeneration;
#if defined(DIAGNOSTIC)
if (object->ref_count != 1)
panic("pmap_release: pteobj reference count != 1");
#endif
lev1pg = NULL;
retry:
curgeneration = object->generation;
for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
n = TAILQ_NEXT(p, listq);
if (p->pindex >= NUSERLEV3MAPS) {
continue;
}
while (1) {
if (!pmap_release_free_page(pmap, p) &&
(object->generation != curgeneration))
goto retry;
}
}
for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
n = TAILQ_NEXT(p, listq);
if (p->pindex < NUSERLEV3MAPS) {
/* can this happen? maybe panic */
goto retry;
}
if (p->pindex >= NUSERLEV3MAPS + NUSERLEV2MAPS) {
lev1pg = p;
continue;
}
while (1) {
if (!pmap_release_free_page(pmap, p) &&
(object->generation != curgeneration))
goto retry;
}
}
if (lev1pg && !pmap_release_free_page(pmap, lev1pg))
goto retry;
}
/*
* grow the number of kernel page table entries, if needed
*/
void
pmap_growkernel(vm_offset_t addr)
{
/* XXX come back to this */
struct proc *p;
struct pmap *pmap;
int s;
pt_entry_t* pte;
pt_entry_t newlev1, newlev2;
vm_offset_t pa;
vm_page_t nkpg;
s = splhigh();
if (kernel_vm_end == 0) {
kernel_vm_end = VM_MIN_KERNEL_ADDRESS;;
/* Count the level 2 page tables */
nklev2 = 0;
nklev3 = 0;
while (pmap_pte_v(pmap_lev1pte(kernel_pmap, kernel_vm_end))) {
nklev2++;
nklev3 += (1L << ALPHA_PTSHIFT);
kernel_vm_end += ALPHA_L1SIZE;
}
/* Count the level 3 page tables in the last level 2 page table */
kernel_vm_end -= ALPHA_L1SIZE;
nklev3 -= (1 << ALPHA_PTSHIFT);
while (pmap_pte_v(pmap_lev2pte(kernel_pmap, kernel_vm_end))) {
nklev3++;
kernel_vm_end += ALPHA_L2SIZE;
}
}
addr = (addr + ALPHA_L2SIZE) & ~(ALPHA_L2SIZE - 1);
while (kernel_vm_end < addr) {
/*
* If the level 1 pte is invalid, allocate a new level 2 page table
*/
pte = pmap_lev1pte(kernel_pmap, kernel_vm_end);
if (!pmap_pte_v(pte)) {
int pindex = NKLEV3MAPS + pmap_lev1_index(kernel_vm_end) - K1SEGLEV1I;
nkpg = vm_page_alloc(kptobj, pindex, VM_ALLOC_SYSTEM);
if (!nkpg)
panic("pmap_growkernel: no memory to grow kernel");
printf("pmap_growkernel: growing to %lx\n", addr);
printf("pmap_growkernel: adding new level2 page table\n");
nklev2++;
vm_page_wire(nkpg);
pa = VM_PAGE_TO_PHYS(nkpg);
pmap_zero_page(pa);
newlev1 = pmap_phys_to_pte(pa)
| PG_V | PG_ASM | PG_KRE | PG_KWE;
LIST_FOREACH(p, &allproc, p_list) {
if (p->p_vmspace) {
pmap = vmspace_pmap(p->p_vmspace);
*pmap_lev1pte(pmap, kernel_vm_end) = newlev1;
}
}
*pte = newlev1;
pmap_invalidate_all(kernel_pmap);
}
/*
* If the level 2 pte is invalid, allocate a new level 3 page table
*/
pte = pmap_lev2pte(kernel_pmap, kernel_vm_end);
if (pmap_pte_v(pte)) {
kernel_vm_end = (kernel_vm_end + ALPHA_L2SIZE) & ~(ALPHA_L2SIZE - 1);
continue;
}
/*
* This index is bogus, but out of the way
*/
nkpg = vm_page_alloc(kptobj, nklev3, VM_ALLOC_SYSTEM);
if (!nkpg)
panic("pmap_growkernel: no memory to grow kernel");
nklev3++;
vm_page_wire(nkpg);
pa = VM_PAGE_TO_PHYS(nkpg);
pmap_zero_page(pa);
newlev2 = pmap_phys_to_pte(pa) | PG_V | PG_ASM | PG_KRE | PG_KWE;
*pte = newlev2;
kernel_vm_end = (kernel_vm_end + ALPHA_L2SIZE) & ~(ALPHA_L2SIZE - 1);
}
splx(s);
}
/*
* Retire the given physical map from service.
* Should only be called if the map contains
* no valid mappings.
*/
void
pmap_destroy(pmap_t pmap)
{
int count;
if (pmap == NULL)
return;
count = --pmap->pm_count;
if (count == 0) {
pmap_release(pmap);
panic("destroying a pmap is not yet implemented");
}
}
/*
* Add a reference to the specified pmap.
*/
void
pmap_reference(pmap_t pmap)
{
if (pmap != NULL) {
pmap->pm_count++;
}
}
/***************************************************
* page management routines.
***************************************************/
/*
* free the pv_entry back to the free list
*/
static PMAP_INLINE void
free_pv_entry(pv_entry_t pv)
{
pv_entry_count--;
zfreei(pvzone, pv);
}
/*
* get a new pv_entry, allocating a block from the system
* when needed.
* the memory allocation is performed bypassing the malloc code
* because of the possibility of allocations at interrupt time.
*/
static pv_entry_t
get_pv_entry(void)
{
pv_entry_count++;
if (pv_entry_high_water &&
(pv_entry_count > pv_entry_high_water) &&
(pmap_pagedaemon_waken == 0)) {
pmap_pagedaemon_waken = 1;
wakeup (&vm_pages_needed);
}
return zalloci(pvzone);
}
/*
* This routine is very drastic, but can save the system
* in a pinch.
*/
void
pmap_collect()
{
int i;
vm_page_t m;
static int warningdone=0;
if (pmap_pagedaemon_waken == 0)
return;
if (warningdone < 5) {
printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
warningdone++;
}
for(i = 0; i < vm_page_array_size; i++) {
m = &vm_page_array[i];
if (m->wire_count || m->hold_count || m->busy ||
(m->flags & PG_BUSY))
continue;
pmap_remove_all(m);
}
pmap_pagedaemon_waken = 0;
}
/*
* If it is the first entry on the list, it is actually
* in the header and we must copy the following entry up
* to the header. Otherwise we must search the list for
* the entry. In either case we free the now unused entry.
*/
static int
pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
{
pv_entry_t pv;
int rtval;
int s;
s = splvm();
if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
for (pv = TAILQ_FIRST(&m->md.pv_list);
pv;
pv = TAILQ_NEXT(pv, pv_list)) {
if (pmap == pv->pv_pmap && va == pv->pv_va)
break;
}
} else {
for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
pv;
pv = TAILQ_NEXT(pv, pv_plist)) {
if (va == pv->pv_va)
break;
}
}
rtval = 0;
if (pv) {
rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
m->md.pv_list_count--;
if (TAILQ_FIRST(&m->md.pv_list) == NULL)
vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
free_pv_entry(pv);
}
splx(s);
return rtval;
}
/*
* Create a pv entry for page at pa for
* (pmap, va).
*/
static void
pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
{
int s;
pv_entry_t pv;
s = splvm();
pv = get_pv_entry();
pv->pv_va = va;
pv->pv_pmap = pmap;
pv->pv_ptem = mpte;
TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
m->md.pv_list_count++;
splx(s);
}
/*
* pmap_remove_pte: do the things to unmap a page in a process
*/
static int
pmap_remove_pte(pmap_t pmap, pt_entry_t* ptq, vm_offset_t va)
{
pt_entry_t oldpte;
vm_page_t m;
oldpte = *ptq;
PMAP_DEBUG_VA(va);
*ptq = 0;
if (oldpte & PG_W)
pmap->pm_stats.wired_count -= 1;
pmap->pm_stats.resident_count -= 1;
if (oldpte & PG_MANAGED) {
m = PHYS_TO_VM_PAGE(pmap_pte_pa(&oldpte));
return pmap_remove_entry(pmap, m, va);
} else {
return pmap_unuse_pt(pmap, va, NULL);
}
return 0;
}
/*
* Remove a single page from a process address space
*/
static void
pmap_remove_page(pmap_t pmap, vm_offset_t va)
{
register pt_entry_t *ptq;
ptq = pmap_lev3pte(pmap, va);
/*
* if there is no pte for this address, just skip it!!!
*/
if (!ptq || !pmap_pte_v(ptq))
return;
/*
* get a local va for mappings for this pmap.
*/
(void) pmap_remove_pte(pmap, ptq, va);
pmap_invalidate_page(pmap, va);
return;
}
/*
* Remove the given range of addresses from the specified map.
*
* It is assumed that the start and end are properly
* rounded to the page size.
*/
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t va, nva;
if (pmap == NULL)
return;
if (pmap->pm_stats.resident_count == 0)
return;
/*
* special handling of removing one page. a very
* common operation and easy to short circuit some
* code.
*/
if (sva + PAGE_SIZE == eva) {
pmap_remove_page(pmap, sva);
return;
}
for (va = sva; va < eva; va = nva) {
if (!pmap_pte_v(pmap_lev1pte(pmap, va))) {
nva = alpha_l1trunc(va + ALPHA_L1SIZE);
continue;
}
if (!pmap_pte_v(pmap_lev2pte(pmap, va))) {
nva = alpha_l2trunc(va + ALPHA_L2SIZE);
continue;
}
pmap_remove_page(pmap, va);
nva = va + PAGE_SIZE;
}
}
/*
* Routine: pmap_remove_all
* Function:
* Removes this physical page from
* all physical maps in which it resides.
* Reflects back modify bits to the pager.
*
* Notes:
* Original versions of this routine were very
* inefficient because they iteratively called
* pmap_remove (slow...)
*/
static void
pmap_remove_all(vm_page_t m)
{
register pv_entry_t pv;
pt_entry_t *pte, tpte;
int nmodify;
int s;
nmodify = 0;
#if defined(PMAP_DIAGNOSTIC)
/*
* XXX this makes pmap_page_protect(NONE) illegal for non-managed
* pages!
*/
if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
}
#endif
s = splvm();
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pte = pmap_lev3pte(pv->pv_pmap, pv->pv_va);
pv->pv_pmap->pm_stats.resident_count--;
if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
tpte = *pte;
PMAP_DEBUG_VA(pv->pv_va);
*pte = 0;
if (tpte & PG_W)
pv->pv_pmap->pm_stats.wired_count--;
pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
m->md.pv_list_count--;
pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
free_pv_entry(pv);
}
vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
splx(s);
return;
}
/*
* Set the physical protection on the
* specified range of this map as requested.
*/
void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
{
pt_entry_t* pte;
int newprot;
if (pmap == NULL)
return;
if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
return;
}
if (prot & VM_PROT_WRITE)
return;
newprot = pte_prot(pmap, prot);
if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
panic("pmap_protect: unaligned addresses");
while (sva < eva) {
/*
* If level 1 pte is invalid, skip this segment
*/
pte = pmap_lev1pte(pmap, sva);
if (!pmap_pte_v(pte)) {
sva = alpha_l1trunc(sva) + ALPHA_L1SIZE;
continue;
}
/*
* If level 2 pte is invalid, skip this segment
*/
pte = pmap_lev2pte(pmap, sva);
if (!pmap_pte_v(pte)) {
sva = alpha_l2trunc(sva) + ALPHA_L2SIZE;
continue;
}
/*
* If level 3 pte is invalid, skip this page
*/
pte = pmap_lev3pte(pmap, sva);
if (!pmap_pte_v(pte)) {
sva += PAGE_SIZE;
continue;
}
if (pmap_pte_prot(pte) != newprot) {
pmap_pte_set_prot(pte, newprot);
pmap_invalidate_page(pmap, sva);
}
sva += PAGE_SIZE;
}
}
/*
* Insert the given physical page (p) at
* the specified virtual address (v) in the
* target physical map with the protection requested.
*
* If specified, the page will be wired down, meaning
* that the related pte can not be reclaimed.
*
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
*/
void
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
boolean_t wired)
{
vm_offset_t pa;
pt_entry_t *pte;
vm_offset_t opa;
pt_entry_t origpte, newpte;
vm_page_t mpte;
int managed;
if (pmap == NULL)
return;
va &= ~PAGE_MASK;
#ifdef PMAP_DIAGNOSTIC
if (va > VM_MAX_KERNEL_ADDRESS)
panic("pmap_enter: toobig");
#endif
mpte = NULL;
/*
* In the case that a page table page is not
* resident, we are creating it here.
*/
if (va < VM_MAXUSER_ADDRESS) {
mpte = pmap_allocpte(pmap, va);
}
pte = pmap_lev3pte(pmap, va);
/*
* Page Directory table entry not valid, we need a new PT page
*/
if (pte == NULL) {
panic("pmap_enter: invalid kernel page tables pmap=%p, va=0x%lx\n", pmap, va);
}
origpte = *pte;
pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
managed = 0;
opa = pmap_pte_pa(pte);
/*
* Mapping has not changed, must be protection or wiring change.
*/
if (origpte && (opa == pa)) {
/*
* Wiring change, just update stats. We don't worry about
* wiring PT pages as they remain resident as long as there
* are valid mappings in them. Hence, if a user page is wired,
* the PT page will be also.
*/
if (wired && ((origpte & PG_W) == 0))
pmap->pm_stats.wired_count++;
else if (!wired && (origpte & PG_W))
pmap->pm_stats.wired_count--;
/*
* Remove extra pte reference
*/
if (mpte)
mpte->hold_count--;
managed = origpte & PG_MANAGED;
goto validate;
}
/*
* Mapping has changed, invalidate old range and fall through to
* handle validating new mapping.
*/
if (opa) {
int err;
err = pmap_remove_pte(pmap, pte, va);
if (err)
panic("pmap_enter: pte vanished, va: 0x%lx", va);
}
/*
* Enter on the PV list if part of our managed memory. Note that we
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
pmap_insert_entry(pmap, va, mpte, m);
managed |= PG_MANAGED;
}
/*
* Increment counters
*/
pmap->pm_stats.resident_count++;
if (wired)
pmap->pm_stats.wired_count++;
validate:
/*
* Now validate mapping with desired protection/wiring.
*/
newpte = pmap_phys_to_pte(pa) | pte_prot(pmap, prot) | PG_V | managed;
if (managed) {
vm_page_t om;
/*
* Set up referenced/modified emulation for the new mapping
*/
om = PHYS_TO_VM_PAGE(pa);
if ((om->md.pv_flags & PV_TABLE_REF) == 0)
newpte |= PG_FOR | PG_FOW | PG_FOE;
else if ((om->md.pv_flags & PV_TABLE_MOD) == 0)
newpte |= PG_FOW;
}
if (wired)
newpte |= PG_W;
/*
* if the mapping or permission bits are different, we need
* to update the pte.
*/
if (origpte != newpte) {
PMAP_DEBUG_VA(va);
*pte = newpte;
if (origpte)
pmap_invalidate_page(pmap, va);
if (prot & VM_PROT_EXECUTE)
alpha_pal_imb();
}
}
/*
* this code makes some *MAJOR* assumptions:
* 1. Current pmap & pmap exists.
* 2. Not wired.
* 3. Read access.
* 4. No page table pages.
* 5. Tlbflush is deferred to calling procedure.
* 6. Page IS managed.
* but is *MUCH* faster than pmap_enter...
*/
static vm_page_t
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
{
register pt_entry_t *pte;
/*
* In the case that a page table page is not
* resident, we are creating it here.
*/
if (va < VM_MAXUSER_ADDRESS) {
unsigned ptepindex;
pt_entry_t* l2pte;
/*
* Calculate lev2 page index
*/
ptepindex = va >> ALPHA_L2SHIFT;
if (mpte && (mpte->pindex == ptepindex)) {
mpte->hold_count++;
} else {
retry:
/*
* Get the level 2 entry
*/
l2pte = pmap_lev2pte(pmap, va);
/*
* If the level 2 page table is mapped, we just increment
* the hold count, and activate it.
*/
if (l2pte && pmap_pte_v(l2pte)) {
if (pmap->pm_ptphint &&
(pmap->pm_ptphint->pindex == ptepindex)) {
mpte = pmap->pm_ptphint;
} else {
mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
pmap->pm_ptphint = mpte;
}
if (mpte == NULL)
goto retry;
mpte->hold_count++;
} else {
mpte = _pmap_allocpte(pmap, ptepindex);
}
}
} else {
mpte = NULL;
}
/*
* This call to vtopte makes the assumption that we are
* entering the page into the current pmap. In order to support
* quick entry into any pmap, one would likely use pmap_pte_quick.
* But that isn't as quick as vtopte.
*/
pte = vtopte(va);
if (*pte) {
if (mpte)
pmap_unwire_pte_hold(pmap, va, mpte);
alpha_pal_imb(); /* XXX overkill? */
return 0;
}
/*
* Enter on the PV list if part of our managed memory. Note that we
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
PMAP_DEBUG_VA(va);
pmap_insert_entry(pmap, va, mpte, m);
/*
* Increment counters
*/
pmap->pm_stats.resident_count++;
/*
* Now validate mapping with RO protection
*/
*pte = pmap_phys_to_pte(VM_PAGE_TO_PHYS(m)) | PG_V | PG_KRE | PG_URE | PG_MANAGED;
alpha_pal_imb(); /* XXX overkill? */
return mpte;
}
/*
* Make temporary mapping for a physical address. This is called
* during dump.
*/
void *
pmap_kenter_temporary(vm_offset_t pa, int i)
{
return (void *) ALPHA_PHYS_TO_K0SEG(pa - (i * PAGE_SIZE));
}
#define MAX_INIT_PT (96)
/*
* pmap_object_init_pt preloads the ptes for a given object
* into the specified pmap. This eliminates the blast of soft
* faults on process startup and immediately after an mmap.
*/
void
pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
vm_object_t object, vm_pindex_t pindex,
vm_size_t size, int limit)
{
vm_offset_t tmpidx;
int psize;
vm_page_t p, mpte;
int objpgs;
if (pmap == NULL || object == NULL)
return;
psize = alpha_btop(size);
if ((object->type != OBJT_VNODE) ||
(limit && (psize > MAX_INIT_PT) &&
(object->resident_page_count > MAX_INIT_PT))) {
return;
}
if (psize + pindex > object->size)
psize = object->size - pindex;
mpte = NULL;
/*
* if we are processing a major portion of the object, then scan the
* entire thing.
*/
if (psize > (object->resident_page_count >> 2)) {
objpgs = psize;
for (p = TAILQ_FIRST(&object->memq);
((objpgs > 0) && (p != NULL));
p = TAILQ_NEXT(p, listq)) {
tmpidx = p->pindex;
if (tmpidx < pindex) {
continue;
}
tmpidx -= pindex;
if (tmpidx >= psize) {
continue;
}
if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
(p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
if ((p->queue - p->pc) == PQ_CACHE)
vm_page_deactivate(p);
vm_page_busy(p);
mpte = pmap_enter_quick(pmap,
addr + alpha_ptob(tmpidx), p, mpte);
vm_page_flag_set(p, PG_MAPPED);
vm_page_wakeup(p);
}
objpgs -= 1;
}
} else {
/*
* else lookup the pages one-by-one.
*/
for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
p = vm_page_lookup(object, tmpidx + pindex);
if (p &&
((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
(p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
if ((p->queue - p->pc) == PQ_CACHE)
vm_page_deactivate(p);
vm_page_busy(p);
mpte = pmap_enter_quick(pmap,
addr + alpha_ptob(tmpidx), p, mpte);
vm_page_flag_set(p, PG_MAPPED);
vm_page_wakeup(p);
}
}
}
return;
}
/*
* pmap_prefault provides a quick way of clustering
* pagefaults into a processes address space. It is a "cousin"
* of pmap_object_init_pt, except it runs at page fault time instead
* of mmap time.
*/
#define PFBAK 4
#define PFFOR 4
#define PAGEORDER_SIZE (PFBAK+PFFOR)
static int pmap_prefault_pageorder[] = {
-PAGE_SIZE, PAGE_SIZE,
-2 * PAGE_SIZE, 2 * PAGE_SIZE,
-3 * PAGE_SIZE, 3 * PAGE_SIZE
-4 * PAGE_SIZE, 4 * PAGE_SIZE
};
void
pmap_prefault(pmap, addra, entry)
pmap_t pmap;
vm_offset_t addra;
vm_map_entry_t entry;
{
int i;
vm_offset_t starta;
vm_offset_t addr;
vm_pindex_t pindex;
vm_page_t m, mpte;
vm_object_t object;
if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
return;
object = entry->object.vm_object;
starta = addra - PFBAK * PAGE_SIZE;
if (starta < entry->start) {
starta = entry->start;
} else if (starta > addra) {
starta = 0;
}
mpte = NULL;
for (i = 0; i < PAGEORDER_SIZE; i++) {
vm_object_t lobject;
pt_entry_t *pte;
addr = addra + pmap_prefault_pageorder[i];
if (addr > addra + (PFFOR * PAGE_SIZE))
addr = 0;
if (addr < starta || addr >= entry->end)
continue;
if (!pmap_pte_v(pmap_lev1pte(pmap, addr))
|| !pmap_pte_v(pmap_lev2pte(pmap, addr)))
continue;
pte = vtopte(addr);
if (*pte)
continue;
pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
lobject = object;
for (m = vm_page_lookup(lobject, pindex);
(!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
lobject = lobject->backing_object) {
if (lobject->backing_object_offset & PAGE_MASK)
break;
pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
m = vm_page_lookup(lobject->backing_object, pindex);
}
/*
* give-up when a page is not in memory
*/
if (m == NULL)
break;
if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
(m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
if ((m->queue - m->pc) == PQ_CACHE) {
vm_page_deactivate(m);
}
vm_page_busy(m);
mpte = pmap_enter_quick(pmap, addr, m, mpte);
vm_page_flag_set(m, PG_MAPPED);
vm_page_wakeup(m);
}
}
}
/*
* Routine: pmap_change_wiring
* Function: Change the wiring attribute for a map/virtual-address
* pair.
* In/out conditions:
* The mapping must already exist in the pmap.
*/
void
pmap_change_wiring(pmap, va, wired)
register pmap_t pmap;
vm_offset_t va;
boolean_t wired;
{
pt_entry_t *pte;
if (pmap == NULL)
return;
pte = pmap_lev3pte(pmap, va);
if (wired && !pmap_pte_w(pte))
pmap->pm_stats.wired_count++;
else if (!wired && pmap_pte_w(pte))
pmap->pm_stats.wired_count--;
/*
* Wiring is not a hardware characteristic so there is no need to
* invalidate TLB.
*/
pmap_pte_set_w(pte, wired);
}
/*
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
* in the destination map.
*
* This routine is only advisory and need not do anything.
*/
void
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
}
/*
* Routine: pmap_kernel
* Function:
* Returns the physical map handle for the kernel.
*/
pmap_t
pmap_kernel()
{
return (kernel_pmap);
}
/*
* pmap_zero_page zeros the specified hardware page by
* mapping it into virtual memory and using bzero to clear
* its contents.
*/
void
pmap_zero_page(vm_offset_t pa)
{
vm_offset_t va = ALPHA_PHYS_TO_K0SEG(pa);
bzero((caddr_t) va, PAGE_SIZE);
}
/*
* pmap_zero_page_area zeros the specified hardware page by
* mapping it into virtual memory and using bzero to clear
* its contents.
*
* off and size must reside within a single page.
*/
void
pmap_zero_page_area(vm_offset_t pa, int off, int size)
{
vm_offset_t va = ALPHA_PHYS_TO_K0SEG(pa);
bzero((char *)(caddr_t)va + off, size);
}
/*
* pmap_copy_page copies the specified (machine independent)
* page by mapping the page into virtual memory and using
* bcopy to copy the page, one machine dependent page at a
* time.
*/
void
pmap_copy_page(vm_offset_t src, vm_offset_t dst)
{
src = ALPHA_PHYS_TO_K0SEG(src);
dst = ALPHA_PHYS_TO_K0SEG(dst);
bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
}
/*
* Routine: pmap_pageable
* Function:
* Make the specified pages (by pmap, offset)
* pageable (or not) as requested.
*
* A page which is not pageable may not take
* a fault; therefore, its page table entry
* must remain valid for the duration.
*
* This routine is merely advisory; pmap_enter
* will specify that these pages are to be wired
* down (or not) as appropriate.
*/
void
pmap_pageable(pmap, sva, eva, pageable)
pmap_t pmap;
vm_offset_t sva, eva;
boolean_t pageable;
{
}
/*
* this routine returns true if a physical page resides
* in the given pmap.
*/
boolean_t
pmap_page_exists(pmap, m)
pmap_t pmap;
vm_page_t m;
{
register pv_entry_t pv;
int s;
if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
return FALSE;
s = splvm();
/*
* Not found, check current mappings returning immediately if found.
*/
for (pv = TAILQ_FIRST(&m->md.pv_list);
pv;
pv = TAILQ_NEXT(pv, pv_list)) {
if (pv->pv_pmap == pmap) {
splx(s);
return TRUE;
}
}
splx(s);
return (FALSE);
}
#define PMAP_REMOVE_PAGES_CURPROC_ONLY
/*
* Remove all pages from specified address space
* this aids process exit speeds. Also, this code
* is special cased for current process only, but
* can have the more generic (and slightly slower)
* mode enabled. This is much faster than pmap_remove
* in the case of running down an entire address space.
*/
void
pmap_remove_pages(pmap, sva, eva)
pmap_t pmap;
vm_offset_t sva, eva;
{
pt_entry_t *pte, tpte;
vm_page_t m;
pv_entry_t pv, npv;
int s;
#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
printf("warning: pmap_remove_pages called with non-current pmap\n");
return;
}
#endif
s = splvm();
for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
pv;
pv = npv) {
if (pv->pv_va >= eva || pv->pv_va < sva) {
npv = TAILQ_NEXT(pv, pv_plist);
continue;
}
#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
pte = vtopte(pv->pv_va);
#else
pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
#endif
if (!pmap_pte_v(pte))
panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
tpte = *pte;
/*
* We cannot remove wired pages from a process' mapping at this time
*/
if (tpte & PG_W) {
npv = TAILQ_NEXT(pv, pv_plist);
continue;
}
PMAP_DEBUG_VA(pv->pv_va);
*pte = 0;
m = PHYS_TO_VM_PAGE(pmap_pte_pa(&tpte));
pv->pv_pmap->pm_stats.resident_count--;
npv = TAILQ_NEXT(pv, pv_plist);
TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
m->md.pv_list_count--;
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
}
pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
free_pv_entry(pv);
}
splx(s);
pmap_invalidate_all(pmap);
}
/*
* this routine is used to modify bits in ptes
*/
static void
pmap_changebit(vm_page_t m, int bit, boolean_t setem)
{
pv_entry_t pv;
pt_entry_t *pte;
int changed;
int s;
if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
return;
s = splvm();
changed = 0;
/*
* Loop over all current mappings setting/clearing as appropos If
* setting RO do we need to clear the VAC?
*/
for (pv = TAILQ_FIRST(&m->md.pv_list);
pv;
pv = TAILQ_NEXT(pv, pv_list)) {
/*
* don't write protect pager mappings
*/
if (!setem && bit == (PG_UWE|PG_KWE)) {
if (!pmap_track_modified(pv->pv_va))
continue;
}
#if defined(PMAP_DIAGNOSTIC)
if (!pv->pv_pmap) {
printf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va);
continue;
}
#endif
pte = pmap_lev3pte(pv->pv_pmap, pv->pv_va);
changed = 0;
if (setem) {
*pte |= bit;
changed = 1;
} else {
pt_entry_t pbits = *pte;
if (pbits & bit) {
changed = 1;
*pte = pbits & ~bit;
}
}
if (changed)
pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
}
splx(s);
}
/*
* pmap_page_protect:
*
* Lower the permission for all mappings to a given page.
*/
void
pmap_page_protect(vm_page_t m, vm_prot_t prot)
{
if ((prot & VM_PROT_WRITE) == 0) {
if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
pmap_changebit(m, PG_KWE|PG_UWE, FALSE);
} else {
pmap_remove_all(m);
}
}
}
vm_offset_t
pmap_phys_address(ppn)
int ppn;
{
return (alpha_ptob(ppn));
}
/*
* pmap_ts_referenced:
*
* Return the count of reference bits for a page, clearing all of them.
*
*/
int
pmap_ts_referenced(vm_page_t m)
{
if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
return 0;
if (m->md.pv_flags & PV_TABLE_REF) {
pmap_changebit(m, PG_FOR|PG_FOE|PG_FOW, TRUE);
m->md.pv_flags &= ~PV_TABLE_REF;
return 1;
}
return 0;
}
/*
* pmap_is_modified:
*
* Return whether or not the specified physical page was modified
* in any physical maps.
*/
boolean_t
pmap_is_modified(vm_page_t m)
{
if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
return FALSE;
return (m->md.pv_flags & PV_TABLE_MOD) != 0;
}
/*
* Clear the modify bits on the specified physical page.
*/
void
pmap_clear_modify(vm_page_t m)
{
if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
return;
if (m->md.pv_flags & PV_TABLE_MOD) {
pmap_changebit(m, PG_FOW, TRUE);
m->md.pv_flags &= ~PV_TABLE_MOD;
}
}
/*
* pmap_page_is_free:
*
* Called when a page is freed to allow pmap to clean up
* any extra state associated with the page. In this case
* clear modified/referenced bits.
*/
void
pmap_page_is_free(vm_page_t m)
{
m->md.pv_flags = 0;
}
/*
* pmap_clear_reference:
*
* Clear the reference bit on the specified physical page.
*/
void
pmap_clear_reference(vm_page_t m)
{
if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
return;
if (m->md.pv_flags & PV_TABLE_REF) {
pmap_changebit(m, PG_FOR|PG_FOE|PG_FOW, TRUE);
m->md.pv_flags &= ~PV_TABLE_REF;
}
}
/*
* pmap_emulate_reference:
*
* Emulate reference and/or modified bit hits.
* From NetBSD
*/
void
pmap_emulate_reference(struct proc *p, vm_offset_t v, int user, int write)
{
pt_entry_t faultoff, *pte;
vm_offset_t pa;
vm_page_t m;
/*
* Convert process and virtual address to physical address.
*/
if (v >= VM_MIN_KERNEL_ADDRESS) {
if (user)
panic("pmap_emulate_reference: user ref to kernel");
pte = vtopte(v);
} else {
#ifdef DIAGNOSTIC
if (p == NULL)
panic("pmap_emulate_reference: bad proc");
if (p->p_vmspace == NULL)
panic("pmap_emulate_reference: bad p_vmspace");
#endif
pte = pmap_lev3pte(p->p_vmspace->vm_map.pmap, v);
}
#ifdef DEBUG /* These checks are more expensive */
if (!pmap_pte_v(pte))
panic("pmap_emulate_reference: invalid pte");
#if 0
/*
* Can't do these, because cpu_fork and cpu_swapin call
* pmap_emulate_reference(), and the bits aren't guaranteed,
* for them...
*/
if (write) {
if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE)))
panic("pmap_emulate_reference: write but unwritable");
if (!(*pte & PG_FOW))
panic("pmap_emulate_reference: write but not FOW");
} else {
if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE)))
panic("pmap_emulate_reference: !write but unreadable");
if (!(*pte & (PG_FOR | PG_FOE)))
panic("pmap_emulate_reference: !write but not FOR|FOE");
}
#endif
/* Other diagnostics? */
#endif
pa = pmap_pte_pa(pte);
#ifdef DIAGNOSTIC
if ((*pte & PG_MANAGED) == 0)
panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): pa 0x%lx not managed", p, v, user, write, pa);
#endif
/*
* Twiddle the appropriate bits to reflect the reference
* and/or modification..
*
* The rules:
* (1) always mark page as used, and
* (2) if it was a write fault, mark page as modified.
*/
m = PHYS_TO_VM_PAGE(pa);
m->md.pv_flags |= PV_TABLE_REF;
faultoff = PG_FOR | PG_FOE;
vm_page_flag_set(m, PG_REFERENCED);
if (write) {
m->md.pv_flags |= PV_TABLE_MOD;
vm_page_dirty(m);
faultoff |= PG_FOW;
}
pmap_changebit(m, faultoff, FALSE);
if ((*pte & faultoff) != 0) {
#if 1
/*
* XXX dfr - don't think its possible in our pmap
*/
/*
* This is apparently normal. Why? -- cgd
* XXX because was being called on unmanaged pages?
*/
panic("warning: pmap_changebit didn't.");
#endif
*pte &= ~faultoff;
ALPHA_TBIS(v);
}
}
/*
* Miscellaneous support routines follow
*/
static void
alpha_protection_init()
{
int prot, *kp, *up;
kp = protection_codes[0];
up = protection_codes[1];
for (prot = 0; prot < 8; prot++) {
switch (prot) {
case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
*kp++ = PG_ASM;
*up++ = 0;
break;
case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
*kp++ = PG_ASM | PG_KRE;
*up++ = PG_URE | PG_KRE;
break;
case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
*kp++ = PG_ASM | PG_KWE;
*up++ = PG_UWE | PG_KWE;
break;
case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
*kp++ = PG_ASM | PG_KWE | PG_KRE;
*up++ = PG_UWE | PG_URE | PG_KWE | PG_KRE;
break;
}
}
}
/*
* Map a set of physical memory pages into the kernel virtual
* address space. Return a pointer to where it is mapped. This
* routine is intended to be used for mapping device memory,
* NOT real memory.
*/
void *
pmap_mapdev(pa, size)
vm_offset_t pa;
vm_size_t size;
{
return (void*) ALPHA_PHYS_TO_K0SEG(pa);
}
/*
* perform the pmap work for mincore
*/
int
pmap_mincore(pmap, addr)
pmap_t pmap;
vm_offset_t addr;
{
pt_entry_t *pte;
int val = 0;
pte = pmap_lev3pte(pmap, addr);
if (pte == 0) {
return 0;
}
if (pmap_pte_v(pte)) {
vm_page_t m;
vm_offset_t pa;
val = MINCORE_INCORE;
if ((*pte & PG_MANAGED) == 0)
return val;
pa = pmap_pte_pa(pte);
m = PHYS_TO_VM_PAGE(pa);
/*
* Modified by us
*/
if (m->md.pv_flags & PV_TABLE_MOD)
val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
/*
* Modified by someone
*/
else if (m->dirty || pmap_is_modified(m))
val |= MINCORE_MODIFIED_OTHER;
/*
* Referenced by us
*/
if (m->md.pv_flags & PV_TABLE_REF)
val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
/*
* Referenced by someone
*/
else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
val |= MINCORE_REFERENCED_OTHER;
vm_page_flag_set(m, PG_REFERENCED);
}
}
return val;
}
void
pmap_activate(struct proc *p)
{
pmap_t pmap;
pmap = vmspace_pmap(p->p_vmspace);
if (pmap_active[PCPU_GET(cpuno)] && pmap != pmap_active[PCPU_GET(cpuno)]) {
atomic_clear_32(&pmap_active[PCPU_GET(cpuno)]->pm_active,
1 << PCPU_GET(cpuno));
pmap_active[PCPU_GET(cpuno)] = 0;
}
p->p_addr->u_pcb.pcb_hw.apcb_ptbr =
ALPHA_K0SEG_TO_PHYS((vm_offset_t) pmap->pm_lev1) >> PAGE_SHIFT;
if (pmap->pm_asn[PCPU_GET(cpuno)].gen != PCPU_GET(current_asngen))
pmap_get_asn(pmap);
pmap_active[PCPU_GET(cpuno)] = pmap;
atomic_set_32(&pmap->pm_active, 1 << PCPU_GET(cpuno));
p->p_addr->u_pcb.pcb_hw.apcb_asn = pmap->pm_asn[PCPU_GET(cpuno)].asn;
if (p == curproc) {
alpha_pal_swpctx((u_long)p->p_md.md_pcbpaddr);
}
}
void
pmap_deactivate(struct proc *p)
{
pmap_t pmap;
pmap = vmspace_pmap(p->p_vmspace);
atomic_clear_32(&pmap->pm_active, 1 << PCPU_GET(cpuno));
pmap_active[PCPU_GET(cpuno)] = 0;
}
vm_offset_t
pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
{
return addr;
}
#if 0
#if defined(PMAP_DEBUG)
pmap_pid_dump(int pid)
{
pmap_t pmap;
struct proc *p;
int npte = 0;
int index;
LIST_FOREACH(p, &allproc, p_list) {
if (p->p_pid != pid)
continue;
if (p->p_vmspace) {
int i,j;
index = 0;
pmap = vmspace_pmap(p->p_vmspace);
for(i=0;i<1024;i++) {
pd_entry_t *pde;
pt_entry_t *pte;
unsigned base = i << PDRSHIFT;
pde = &pmap->pm_pdir[i];
if (pde && pmap_pde_v(pde)) {
for(j=0;j<1024;j++) {
unsigned va = base + (j << PAGE_SHIFT);
if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
if (index) {
index = 0;
printf("\n");
}
return npte;
}
pte = pmap_pte_quick( pmap, va);
if (pte && pmap_pte_v(pte)) {
vm_offset_t pa;
vm_page_t m;
pa = *(int *)pte;
m = PHYS_TO_VM_PAGE(pa);
printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
va, pa, m->hold_count, m->wire_count, m->flags);
npte++;
index++;
if (index >= 2) {
index = 0;
printf("\n");
} else {
printf(" ");
}
}
}
}
}
}
}
return npte;
}
#endif
#if defined(DEBUG)
static void pads __P((pmap_t pm));
static void pmap_pvdump __P((vm_page_t m));
/* print address space of pmap*/
static void
pads(pm)
pmap_t pm;
{
int i, j;
vm_offset_t va;
pt_entry_t *ptep;
if (pm == kernel_pmap)
return;
for (i = 0; i < 1024; i++)
if (pm->pm_pdir[i])
for (j = 0; j < 1024; j++) {
va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
if (pm == kernel_pmap && va < KERNBASE)
continue;
if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
continue;
ptep = pmap_pte_quick(pm, va);
if (pmap_pte_v(ptep))
printf("%x:%x ", va, *(int *) ptep);
};
}
static void
pmap_pvdump(pa)
vm_offset_t pa;
{
pv_entry_t pv;
printf("pa %x", pa);
m = PHYS_TO_VM_PAGE(pa);
for (pv = TAILQ_FIRST(&m->md.pv_list);
pv;
pv = TAILQ_NEXT(pv, pv_list)) {
#ifdef used_to_be
printf(" -> pmap %x, va %x, flags %x",
pv->pv_pmap, pv->pv_va, pv->pv_flags);
#endif
printf(" -> pmap %x, va %x",
pv->pv_pmap, pv->pv_va);
pads(pv->pv_pmap);
}
printf(" ");
}
#endif
#endif