2008-08-12 19:48:18 +00:00
|
|
|
|
/*-
|
|
|
|
|
* Copyright (c) 1991 Regents of the University of California.
|
|
|
|
|
* All rights reserved.
|
|
|
|
|
* Copyright (c) 1994 John S. Dyson
|
|
|
|
|
* All rights reserved.
|
|
|
|
|
* Copyright (c) 1994 David Greenman
|
|
|
|
|
* All rights reserved.
|
|
|
|
|
* Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu>
|
|
|
|
|
* All rights reserved.
|
|
|
|
|
*
|
|
|
|
|
* This code is derived from software contributed to Berkeley by
|
|
|
|
|
* the Systems Programming Group of the University of Utah Computer
|
|
|
|
|
* Science Department and William Jolitz of UUNET Technologies Inc.
|
|
|
|
|
*
|
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
|
* are met:
|
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
|
|
|
* must display the following acknowledgement:
|
|
|
|
|
* This product includes software developed by the University of
|
|
|
|
|
* California, Berkeley and its contributors.
|
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
|
* without specific prior written permission.
|
|
|
|
|
*
|
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
|
*
|
|
|
|
|
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
|
|
|
|
|
*/
|
|
|
|
|
/*-
|
|
|
|
|
* Copyright (c) 2003 Networks Associates Technology, Inc.
|
|
|
|
|
* All rights reserved.
|
|
|
|
|
*
|
|
|
|
|
* This software was developed for the FreeBSD Project by Jake Burkholder,
|
|
|
|
|
* Safeport Network Services, and Network Associates Laboratories, the
|
|
|
|
|
* Security Research Division of Network Associates, Inc. under
|
|
|
|
|
* DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
|
|
|
|
|
* CHATS research program.
|
|
|
|
|
*
|
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
|
* are met:
|
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
|
*
|
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Manages physical address maps.
|
|
|
|
|
*
|
|
|
|
|
* In addition to hardware address maps, this
|
|
|
|
|
* module is called upon to provide software-use-only
|
|
|
|
|
* maps which may or may not be stored in the same
|
|
|
|
|
* form as hardware maps. These pseudo-maps are
|
|
|
|
|
* used to store intermediate results from copy
|
|
|
|
|
* operations to and from address spaces.
|
|
|
|
|
*
|
|
|
|
|
* Since the information managed by this module is
|
|
|
|
|
* also stored by the logical address mapping module,
|
|
|
|
|
* this module may throw away valid virtual-to-physical
|
|
|
|
|
* mappings at almost any time. However, invalidations
|
|
|
|
|
* of virtual-to-physical mappings must be done as
|
|
|
|
|
* requested.
|
|
|
|
|
*
|
|
|
|
|
* In order to cope with hardware architectures which
|
|
|
|
|
* make virtual-to-physical map invalidates expensive,
|
|
|
|
|
* this module may delay invalidate or reduced protection
|
|
|
|
|
* operations until such time as they are actually
|
|
|
|
|
* necessary. This module is given full information as
|
|
|
|
|
* to which processors are currently using which maps,
|
|
|
|
|
* and to when physical maps must be made correct.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "opt_cpu.h"
|
|
|
|
|
#include "opt_pmap.h"
|
|
|
|
|
#include "opt_smp.h"
|
|
|
|
|
#include "opt_xbox.h"
|
|
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
|
#include <sys/systm.h>
|
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
|
#include <sys/ktr.h>
|
|
|
|
|
#include <sys/lock.h>
|
|
|
|
|
#include <sys/malloc.h>
|
|
|
|
|
#include <sys/mman.h>
|
|
|
|
|
#include <sys/msgbuf.h>
|
|
|
|
|
#include <sys/mutex.h>
|
|
|
|
|
#include <sys/proc.h>
|
2009-07-29 19:38:33 +00:00
|
|
|
|
#include <sys/sf_buf.h>
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#include <sys/sx.h>
|
|
|
|
|
#include <sys/vmmeter.h>
|
|
|
|
|
#include <sys/sched.h>
|
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
|
#ifdef SMP
|
|
|
|
|
#include <sys/smp.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#include <vm/vm.h>
|
|
|
|
|
#include <vm/vm_param.h>
|
|
|
|
|
#include <vm/vm_kern.h>
|
|
|
|
|
#include <vm/vm_page.h>
|
|
|
|
|
#include <vm/vm_map.h>
|
|
|
|
|
#include <vm/vm_object.h>
|
|
|
|
|
#include <vm/vm_extern.h>
|
|
|
|
|
#include <vm/vm_pageout.h>
|
|
|
|
|
#include <vm/vm_pager.h>
|
|
|
|
|
#include <vm/uma.h>
|
|
|
|
|
|
|
|
|
|
#include <machine/cpu.h>
|
|
|
|
|
#include <machine/cputypes.h>
|
|
|
|
|
#include <machine/md_var.h>
|
|
|
|
|
#include <machine/pcb.h>
|
|
|
|
|
#include <machine/specialreg.h>
|
|
|
|
|
#ifdef SMP
|
|
|
|
|
#include <machine/smp.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef XBOX
|
|
|
|
|
#include <machine/xbox.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#include <xen/interface/xen.h>
|
2008-12-29 06:31:03 +00:00
|
|
|
|
#include <xen/hypervisor.h>
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#include <machine/xen/hypercall.h>
|
|
|
|
|
#include <machine/xen/xenvar.h>
|
|
|
|
|
#include <machine/xen/xenfunc.h>
|
|
|
|
|
|
|
|
|
|
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
|
|
|
|
|
#define CPU_ENABLE_SSE
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifndef PMAP_SHPGPERPROC
|
|
|
|
|
#define PMAP_SHPGPERPROC 200
|
|
|
|
|
#endif
|
|
|
|
|
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
#define DIAGNOSTIC
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
#if !defined(DIAGNOSTIC)
|
2010-02-18 14:28:38 +00:00
|
|
|
|
#ifdef __GNUC_GNU_INLINE__
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
|
2010-02-18 14:28:38 +00:00
|
|
|
|
#else
|
2010-01-19 15:31:18 +00:00
|
|
|
|
#define PMAP_INLINE extern inline
|
2010-02-18 14:28:38 +00:00
|
|
|
|
#endif
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#else
|
|
|
|
|
#define PMAP_INLINE
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#define PV_STATS
|
|
|
|
|
#ifdef PV_STATS
|
|
|
|
|
#define PV_STAT(x) do { x ; } while (0)
|
|
|
|
|
#else
|
|
|
|
|
#define PV_STAT(x) do { } while (0)
|
|
|
|
|
#endif
|
|
|
|
|
|
2008-08-15 04:00:44 +00:00
|
|
|
|
#define pa_index(pa) ((pa) >> PDRSHIFT)
|
|
|
|
|
#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* Get PDEs and PTEs for user/kernel address space
|
|
|
|
|
*/
|
|
|
|
|
#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
|
|
|
|
|
#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
|
|
|
|
|
|
|
|
|
|
#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0)
|
|
|
|
|
#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0)
|
|
|
|
|
#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0)
|
|
|
|
|
#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
|
|
|
|
|
#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
|
|
|
|
|
|
|
|
|
|
#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
|
|
|
|
|
|
2011-01-04 15:55:15 +00:00
|
|
|
|
#define HAMFISTED_LOCKING
|
|
|
|
|
#ifdef HAMFISTED_LOCKING
|
|
|
|
|
static struct mtx createdelete_lock;
|
|
|
|
|
#endif
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
struct pmap kernel_pmap_store;
|
|
|
|
|
LIST_HEAD(pmaplist, pmap);
|
|
|
|
|
static struct pmaplist allpmaps;
|
|
|
|
|
static struct mtx allpmaps_lock;
|
|
|
|
|
|
|
|
|
|
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
|
|
|
|
|
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
|
|
|
|
|
int pgeflag = 0; /* PG_G or-in */
|
|
|
|
|
int pseflag = 0; /* PG_PS or-in */
|
|
|
|
|
|
2008-09-10 07:11:08 +00:00
|
|
|
|
int nkpt;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
vm_offset_t kernel_vm_end;
|
|
|
|
|
extern u_int32_t KERNend;
|
|
|
|
|
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
pt_entry_t pg_nx;
|
|
|
|
|
#endif
|
|
|
|
|
|
2009-09-01 05:15:45 +00:00
|
|
|
|
static int pat_works; /* Is page attribute table sane? */
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* Data for the pv entry allocation mechanism
|
|
|
|
|
*/
|
|
|
|
|
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
|
2008-08-15 04:00:44 +00:00
|
|
|
|
static struct md_page *pv_table;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
static int shpgperproc = PMAP_SHPGPERPROC;
|
|
|
|
|
|
|
|
|
|
struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */
|
|
|
|
|
int pv_maxchunks; /* How many chunks we have KVA for */
|
|
|
|
|
vm_offset_t pv_vafree; /* freelist stored in the PTE */
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* All those kernel PT submaps that BSD is so fond of
|
|
|
|
|
*/
|
|
|
|
|
struct sysmaps {
|
|
|
|
|
struct mtx lock;
|
|
|
|
|
pt_entry_t *CMAP1;
|
|
|
|
|
pt_entry_t *CMAP2;
|
|
|
|
|
caddr_t CADDR1;
|
|
|
|
|
caddr_t CADDR2;
|
|
|
|
|
};
|
|
|
|
|
static struct sysmaps sysmaps_pcpu[MAXCPU];
|
|
|
|
|
static pt_entry_t *CMAP3;
|
2010-02-21 01:13:34 +00:00
|
|
|
|
caddr_t ptvmmap = 0;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
static caddr_t CADDR3;
|
|
|
|
|
struct msgbuf *msgbufp = 0;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Crashdump maps.
|
|
|
|
|
*/
|
|
|
|
|
static caddr_t crashdumpmap;
|
|
|
|
|
|
|
|
|
|
static pt_entry_t *PMAP1 = 0, *PMAP2;
|
|
|
|
|
static pt_entry_t *PADDR1 = 0, *PADDR2;
|
|
|
|
|
#ifdef SMP
|
|
|
|
|
static int PMAP1cpu;
|
|
|
|
|
static int PMAP1changedcpu;
|
|
|
|
|
SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD,
|
|
|
|
|
&PMAP1changedcpu, 0,
|
|
|
|
|
"Number of times pmap_pte_quick changed CPU with same PMAP1");
|
|
|
|
|
#endif
|
|
|
|
|
static int PMAP1changed;
|
|
|
|
|
SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD,
|
|
|
|
|
&PMAP1changed, 0,
|
|
|
|
|
"Number of times pmap_pte_quick changed PMAP1");
|
|
|
|
|
static int PMAP1unchanged;
|
|
|
|
|
SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
|
|
|
|
|
&PMAP1unchanged, 0,
|
|
|
|
|
"Number of times pmap_pte_quick didn't change PMAP1");
|
|
|
|
|
static struct mtx PMAP2mutex;
|
|
|
|
|
|
2008-08-15 04:00:44 +00:00
|
|
|
|
SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
|
|
|
|
|
static int pg_ps_enabled;
|
2009-11-11 14:21:31 +00:00
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
|
2008-08-15 04:00:44 +00:00
|
|
|
|
"Are large page mappings enabled?");
|
|
|
|
|
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
|
|
|
|
|
"Max number of PV entries");
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
|
|
|
|
|
"Page share factor per proc");
|
2010-04-30 03:26:12 +00:00
|
|
|
|
SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
|
|
|
|
|
"2/4MB page mapping counters");
|
|
|
|
|
|
|
|
|
|
static u_long pmap_pde_mappings;
|
|
|
|
|
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
|
|
|
|
|
&pmap_pde_mappings, 0, "2/4MB page mappings");
|
2008-08-15 04:00:44 +00:00
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
|
|
|
|
|
static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
|
|
|
|
|
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
|
|
|
|
|
vm_offset_t va);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va,
|
|
|
|
|
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
|
|
|
|
|
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
|
|
|
|
|
vm_page_t *free);
|
|
|
|
|
static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
|
|
|
|
|
vm_page_t *free);
|
|
|
|
|
static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
|
|
|
|
|
vm_offset_t va);
|
|
|
|
|
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
|
|
|
|
|
vm_page_t m);
|
|
|
|
|
|
|
|
|
|
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
|
|
|
|
|
|
|
|
|
|
static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
|
|
|
|
|
static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
|
|
|
|
|
static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
|
|
|
|
|
static void pmap_pte_release(pt_entry_t *pte);
|
|
|
|
|
static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
|
|
|
|
|
static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
|
|
|
|
|
static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
|
2008-08-15 04:00:44 +00:00
|
|
|
|
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
|
|
|
|
|
|
2009-09-01 03:44:25 +00:00
|
|
|
|
static __inline void pagezero(void *page);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
|
|
|
|
|
CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If you get an error here, then you set KVA_PAGES wrong! See the
|
|
|
|
|
* description of KVA_PAGES in sys/i386/include/pmap.h. It must be
|
|
|
|
|
* multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
|
|
|
|
|
*/
|
|
|
|
|
CTASSERT(KERNBASE % (1 << 24) == 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
|
|
|
|
|
{
|
|
|
|
|
vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]);
|
|
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
|
case SH_PD_SET_VA:
|
|
|
|
|
#if 0
|
|
|
|
|
xen_queue_pt_update(shadow_pdir_ma,
|
|
|
|
|
xpmap_ptom(val & ~(PG_RW)));
|
|
|
|
|
#endif
|
|
|
|
|
xen_queue_pt_update(pdir_ma,
|
|
|
|
|
xpmap_ptom(val));
|
|
|
|
|
break;
|
|
|
|
|
case SH_PD_SET_VA_MA:
|
|
|
|
|
#if 0
|
|
|
|
|
xen_queue_pt_update(shadow_pdir_ma,
|
|
|
|
|
val & ~(PG_RW));
|
|
|
|
|
#endif
|
|
|
|
|
xen_queue_pt_update(pdir_ma, val);
|
|
|
|
|
break;
|
|
|
|
|
case SH_PD_SET_VA_CLEAR:
|
|
|
|
|
#if 0
|
|
|
|
|
xen_queue_pt_update(shadow_pdir_ma, 0);
|
|
|
|
|
#endif
|
|
|
|
|
xen_queue_pt_update(pdir_ma, 0);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Move the kernel virtual free pointer to the next
|
|
|
|
|
* 4MB. This is used to help improve performance
|
|
|
|
|
* by using a large (4MB) page for much of the kernel
|
|
|
|
|
* (.text, .data, .bss)
|
|
|
|
|
*/
|
|
|
|
|
static vm_offset_t
|
|
|
|
|
pmap_kmem_choose(vm_offset_t addr)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t newaddr = addr;
|
|
|
|
|
|
|
|
|
|
#ifndef DISABLE_PSE
|
|
|
|
|
if (cpu_feature & CPUID_PSE)
|
|
|
|
|
newaddr = (addr + PDRMASK) & ~PDRMASK;
|
|
|
|
|
#endif
|
|
|
|
|
return newaddr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Bootstrap the system enough to run with virtual memory.
|
|
|
|
|
*
|
|
|
|
|
* On the i386 this is called after mapping has already been enabled
|
|
|
|
|
* and just syncs the pmap module with what has already been done.
|
|
|
|
|
* [We can't call it easily with mapping off since the kernel is not
|
|
|
|
|
* mapped with PA == VA, hence we would have to relocate every address
|
|
|
|
|
* from the linked base (virtual) address "KERNBASE" to the actual
|
|
|
|
|
* (physical) address starting relative to 0]
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_bootstrap(vm_paddr_t firstaddr)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t va;
|
|
|
|
|
pt_entry_t *pte, *unused;
|
|
|
|
|
struct sysmaps *sysmaps;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
|
|
|
|
|
* large. It should instead be correctly calculated in locore.s and
|
|
|
|
|
* not based on 'first' (which is a physical address, not a virtual
|
|
|
|
|
* address, for the start of unused physical memory). The kernel
|
|
|
|
|
* page tables are NOT double mapped and thus should not be included
|
|
|
|
|
* in this calculation.
|
|
|
|
|
*/
|
|
|
|
|
virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
|
|
|
|
|
virtual_avail = pmap_kmem_choose(virtual_avail);
|
|
|
|
|
|
|
|
|
|
virtual_end = VM_MAX_KERNEL_ADDRESS;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Initialize the kernel pmap (which is statically allocated).
|
|
|
|
|
*/
|
|
|
|
|
PMAP_LOCK_INIT(kernel_pmap);
|
|
|
|
|
kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
|
|
|
|
|
#endif
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
|
2008-08-12 19:48:18 +00:00
|
|
|
|
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
|
|
|
|
|
LIST_INIT(&allpmaps);
|
|
|
|
|
mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
|
|
|
|
|
mtx_lock_spin(&allpmaps_lock);
|
|
|
|
|
LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
|
|
|
|
|
mtx_unlock_spin(&allpmaps_lock);
|
2008-09-25 07:03:09 +00:00
|
|
|
|
if (nkpt == 0)
|
|
|
|
|
nkpt = NKPT;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Reserve some special page table entries/VA space for temporary
|
|
|
|
|
* mapping of pages.
|
|
|
|
|
*/
|
|
|
|
|
#define SYSMAP(c, p, v, n) \
|
|
|
|
|
v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
|
|
|
|
|
|
|
|
|
|
va = virtual_avail;
|
|
|
|
|
pte = vtopte(va);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* CMAP1/CMAP2 are used for zeroing and copying pages.
|
|
|
|
|
* CMAP3 is used for the idle process page zeroing.
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < MAXCPU; i++) {
|
|
|
|
|
sysmaps = &sysmaps_pcpu[i];
|
|
|
|
|
mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
|
|
|
|
|
SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
|
|
|
|
|
SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
|
2010-02-21 01:13:34 +00:00
|
|
|
|
PT_SET_MA(sysmaps->CADDR1, 0);
|
|
|
|
|
PT_SET_MA(sysmaps->CADDR2, 0);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
SYSMAP(caddr_t, CMAP3, CADDR3, 1)
|
|
|
|
|
PT_SET_MA(CADDR3, 0);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Crashdump maps.
|
|
|
|
|
*/
|
|
|
|
|
SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* ptvmmap is used for reading arbitrary physical pages via /dev/mem.
|
|
|
|
|
*/
|
|
|
|
|
SYSMAP(caddr_t, unused, ptvmmap, 1)
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* msgbufp is used to map the system message buffer.
|
|
|
|
|
*/
|
2011-01-21 10:26:26 +00:00
|
|
|
|
SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize)))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* ptemap is used for pmap_pte_quick
|
|
|
|
|
*/
|
|
|
|
|
SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
|
|
|
|
|
SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
|
|
|
|
|
|
|
|
|
|
mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
|
|
|
|
|
|
|
|
|
|
virtual_avail = va;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Leave in place an identity mapping (virt == phys) for the low 1 MB
|
|
|
|
|
* physical memory region that is used by the ACPI wakeup code. This
|
|
|
|
|
* mapping must not have PG_G set.
|
|
|
|
|
*/
|
|
|
|
|
#ifndef XEN
|
|
|
|
|
/*
|
|
|
|
|
* leave here deliberately to show that this is not supported
|
|
|
|
|
*/
|
|
|
|
|
#ifdef XBOX
|
|
|
|
|
/* FIXME: This is gross, but needed for the XBOX. Since we are in such
|
|
|
|
|
* an early stadium, we cannot yet neatly map video memory ... :-(
|
|
|
|
|
* Better fixes are very welcome! */
|
|
|
|
|
if (!arch_i386_is_xbox)
|
|
|
|
|
#endif
|
|
|
|
|
for (i = 1; i < NKPT; i++)
|
|
|
|
|
PTD[i] = 0;
|
|
|
|
|
|
|
|
|
|
/* Initialize the PAT MSR if present. */
|
|
|
|
|
pmap_init_pat();
|
|
|
|
|
|
|
|
|
|
/* Turn on PG_G on kernel page(s) */
|
|
|
|
|
pmap_set_pg();
|
|
|
|
|
#endif
|
2011-01-04 15:55:15 +00:00
|
|
|
|
|
|
|
|
|
#ifdef HAMFISTED_LOCKING
|
|
|
|
|
mtx_init(&createdelete_lock, "pmap create/delete", NULL, MTX_DEF);
|
|
|
|
|
#endif
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Setup the PAT MSR.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_init_pat(void)
|
|
|
|
|
{
|
|
|
|
|
uint64_t pat_msr;
|
|
|
|
|
|
|
|
|
|
/* Bail if this CPU doesn't implement PAT. */
|
|
|
|
|
if (!(cpu_feature & CPUID_PAT))
|
|
|
|
|
return;
|
|
|
|
|
|
2009-09-01 05:15:45 +00:00
|
|
|
|
if (cpu_vendor_id != CPU_VENDOR_INTEL ||
|
2009-09-10 17:27:36 +00:00
|
|
|
|
(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) {
|
2009-09-01 05:15:45 +00:00
|
|
|
|
/*
|
|
|
|
|
* Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
|
|
|
|
|
* Program 4 and 5 as WP and WC.
|
|
|
|
|
* Leave 6 and 7 as UC and UC-.
|
|
|
|
|
*/
|
|
|
|
|
pat_msr = rdmsr(MSR_PAT);
|
|
|
|
|
pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
|
|
|
|
|
pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
|
|
|
|
|
PAT_VALUE(5, PAT_WRITE_COMBINING);
|
|
|
|
|
pat_works = 1;
|
|
|
|
|
} else {
|
|
|
|
|
/*
|
|
|
|
|
* Due to some Intel errata, we can only safely use the lower 4
|
|
|
|
|
* PAT entries. Thus, just replace PAT Index 2 with WC instead
|
|
|
|
|
* of UC-.
|
|
|
|
|
*
|
|
|
|
|
* Intel Pentium III Processor Specification Update
|
|
|
|
|
* Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
|
|
|
|
|
* or Mode C Paging)
|
|
|
|
|
*
|
|
|
|
|
* Intel Pentium IV Processor Specification Update
|
|
|
|
|
* Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
|
|
|
|
|
*/
|
|
|
|
|
pat_msr = rdmsr(MSR_PAT);
|
|
|
|
|
pat_msr &= ~PAT_MASK(2);
|
|
|
|
|
pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
|
|
|
|
|
pat_works = 0;
|
|
|
|
|
}
|
2008-08-12 19:48:18 +00:00
|
|
|
|
wrmsr(MSR_PAT, pat_msr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Initialize a vm_page's machine-dependent fields.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_page_init(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
TAILQ_INIT(&m->md.pv_list);
|
2009-07-12 23:31:20 +00:00
|
|
|
|
m->md.pat_mode = PAT_WRITE_BACK;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* ABuse the pte nodes for unmapped kva to thread a kva freelist through.
|
|
|
|
|
* Requirements:
|
|
|
|
|
* - Must deal with pages in order to ensure that none of the PG_* bits
|
|
|
|
|
* are ever set, PG_V in particular.
|
|
|
|
|
* - Assumes we can write to ptes without pte_store() atomic ops, even
|
|
|
|
|
* on PAE systems. This should be ok.
|
|
|
|
|
* - Assumes nothing will ever test these addresses for 0 to indicate
|
|
|
|
|
* no mapping instead of correctly checking PG_V.
|
|
|
|
|
* - Assumes a vm_offset_t will fit in a pte (true for i386).
|
|
|
|
|
* Because PG_V is never set, there can be no mappings to invalidate.
|
|
|
|
|
*/
|
|
|
|
|
static int ptelist_count = 0;
|
|
|
|
|
static vm_offset_t
|
|
|
|
|
pmap_ptelist_alloc(vm_offset_t *head)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t va;
|
|
|
|
|
vm_offset_t *phead = (vm_offset_t *)*head;
|
|
|
|
|
|
|
|
|
|
if (ptelist_count == 0) {
|
|
|
|
|
printf("out of memory!!!!!!\n");
|
|
|
|
|
return (0); /* Out of memory */
|
|
|
|
|
}
|
|
|
|
|
ptelist_count--;
|
|
|
|
|
va = phead[ptelist_count];
|
|
|
|
|
return (va);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t *phead = (vm_offset_t *)*head;
|
|
|
|
|
|
|
|
|
|
phead[ptelist_count++] = va;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
|
|
|
|
|
{
|
|
|
|
|
int i, nstackpages;
|
|
|
|
|
vm_offset_t va;
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
|
|
|
|
|
nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t));
|
|
|
|
|
for (i = 0; i < nstackpages; i++) {
|
|
|
|
|
va = (vm_offset_t)base + i * PAGE_SIZE;
|
|
|
|
|
m = vm_page_alloc(NULL, i,
|
|
|
|
|
VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
|
|
|
|
|
VM_ALLOC_ZERO);
|
|
|
|
|
pmap_qenter(va, &m, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*head = (vm_offset_t)base;
|
|
|
|
|
for (i = npages - 1; i >= nstackpages; i--) {
|
|
|
|
|
va = (vm_offset_t)base + i * PAGE_SIZE;
|
|
|
|
|
pmap_ptelist_free(head, va);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Initialize the pmap module.
|
|
|
|
|
* Called by vm_init, to initialize any structures that the pmap
|
|
|
|
|
* system needs to map virtual memory.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_init(void)
|
|
|
|
|
{
|
2008-08-15 04:00:44 +00:00
|
|
|
|
vm_page_t mpte;
|
|
|
|
|
vm_size_t s;
|
|
|
|
|
int i, pv_npg;
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
2008-08-15 04:00:44 +00:00
|
|
|
|
* Initialize the vm page array entries for the kernel pmap's
|
|
|
|
|
* page table pages.
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < nkpt; i++) {
|
2008-08-17 23:36:52 +00:00
|
|
|
|
mpte = PHYS_TO_VM_PAGE(xpmap_mtop(PTD[i + KPTDI] & PG_FRAME));
|
2008-08-15 04:00:44 +00:00
|
|
|
|
KASSERT(mpte >= vm_page_array &&
|
|
|
|
|
mpte < &vm_page_array[vm_page_array_size],
|
|
|
|
|
("pmap_init: page table page is out of range"));
|
|
|
|
|
mpte->pindex = i + KPTDI;
|
2008-08-17 23:36:52 +00:00
|
|
|
|
mpte->phys_addr = xpmap_mtop(PTD[i + KPTDI] & PG_FRAME);
|
2008-08-15 04:00:44 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2008-08-12 19:48:18 +00:00
|
|
|
|
* Initialize the address space (zone) for the pv entries. Set a
|
|
|
|
|
* high water mark so that the system can recover from excessive
|
|
|
|
|
* numbers of pv entries.
|
|
|
|
|
*/
|
|
|
|
|
TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
|
|
|
|
|
pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
|
|
|
|
|
TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
|
|
|
|
|
pv_entry_max = roundup(pv_entry_max, _NPCPV);
|
|
|
|
|
pv_entry_high_water = 9 * (pv_entry_max / 10);
|
|
|
|
|
|
2008-08-15 04:00:44 +00:00
|
|
|
|
/*
|
|
|
|
|
* Are large page mappings enabled?
|
|
|
|
|
*/
|
|
|
|
|
TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate the size of the pv head table for superpages.
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; phys_avail[i + 1]; i += 2);
|
|
|
|
|
pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Allocate memory for the pv head table for superpages.
|
|
|
|
|
*/
|
|
|
|
|
s = (vm_size_t)(pv_npg * sizeof(struct md_page));
|
|
|
|
|
s = round_page(s);
|
|
|
|
|
pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
|
|
|
|
|
for (i = 0; i < pv_npg; i++)
|
|
|
|
|
TAILQ_INIT(&pv_table[i].pv_list);
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
|
|
|
|
|
pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
|
|
|
|
|
PAGE_SIZE * pv_maxchunks);
|
|
|
|
|
if (pv_chunkbase == NULL)
|
|
|
|
|
panic("pmap_init: not enough kvm for pv chunks");
|
|
|
|
|
pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/***************************************************
|
|
|
|
|
* Low level helper routines.....
|
|
|
|
|
***************************************************/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Determine the appropriate bits to set in a PTE or PDE for a specified
|
|
|
|
|
* caching mode.
|
|
|
|
|
*/
|
2009-07-29 19:38:33 +00:00
|
|
|
|
int
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pmap_cache_bits(int mode, boolean_t is_pde)
|
|
|
|
|
{
|
|
|
|
|
int pat_flag, pat_index, cache_bits;
|
|
|
|
|
|
|
|
|
|
/* The PAT bit is different for PTE's and PDE's. */
|
|
|
|
|
pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
|
|
|
|
|
|
|
|
|
|
/* If we don't support PAT, map extended modes to older ones. */
|
|
|
|
|
if (!(cpu_feature & CPUID_PAT)) {
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case PAT_UNCACHEABLE:
|
|
|
|
|
case PAT_WRITE_THROUGH:
|
|
|
|
|
case PAT_WRITE_BACK:
|
|
|
|
|
break;
|
|
|
|
|
case PAT_UNCACHED:
|
|
|
|
|
case PAT_WRITE_COMBINING:
|
|
|
|
|
case PAT_WRITE_PROTECTED:
|
|
|
|
|
mode = PAT_UNCACHEABLE;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Map the caching mode to a PAT index. */
|
2009-09-01 05:15:45 +00:00
|
|
|
|
if (pat_works) {
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case PAT_UNCACHEABLE:
|
|
|
|
|
pat_index = 3;
|
|
|
|
|
break;
|
|
|
|
|
case PAT_WRITE_THROUGH:
|
|
|
|
|
pat_index = 1;
|
|
|
|
|
break;
|
|
|
|
|
case PAT_WRITE_BACK:
|
|
|
|
|
pat_index = 0;
|
|
|
|
|
break;
|
|
|
|
|
case PAT_UNCACHED:
|
|
|
|
|
pat_index = 2;
|
|
|
|
|
break;
|
|
|
|
|
case PAT_WRITE_COMBINING:
|
|
|
|
|
pat_index = 5;
|
|
|
|
|
break;
|
|
|
|
|
case PAT_WRITE_PROTECTED:
|
|
|
|
|
pat_index = 4;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
panic("Unknown caching mode %d\n", mode);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case PAT_UNCACHED:
|
|
|
|
|
case PAT_UNCACHEABLE:
|
|
|
|
|
case PAT_WRITE_PROTECTED:
|
|
|
|
|
pat_index = 3;
|
|
|
|
|
break;
|
|
|
|
|
case PAT_WRITE_THROUGH:
|
|
|
|
|
pat_index = 1;
|
|
|
|
|
break;
|
|
|
|
|
case PAT_WRITE_BACK:
|
|
|
|
|
pat_index = 0;
|
|
|
|
|
break;
|
|
|
|
|
case PAT_WRITE_COMBINING:
|
|
|
|
|
pat_index = 2;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
panic("Unknown caching mode %d\n", mode);
|
|
|
|
|
}
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
|
|
|
|
|
cache_bits = 0;
|
|
|
|
|
if (pat_index & 0x4)
|
|
|
|
|
cache_bits |= pat_flag;
|
|
|
|
|
if (pat_index & 0x2)
|
|
|
|
|
cache_bits |= PG_NC_PCD;
|
|
|
|
|
if (pat_index & 0x1)
|
|
|
|
|
cache_bits |= PG_NC_PWT;
|
|
|
|
|
return (cache_bits);
|
|
|
|
|
}
|
|
|
|
|
#ifdef SMP
|
|
|
|
|
/*
|
|
|
|
|
* For SMP, these functions have to use the IPI mechanism for coherence.
|
|
|
|
|
*
|
|
|
|
|
* N.B.: Before calling any of the following TLB invalidation functions,
|
|
|
|
|
* the calling processor must ensure that all stores updating a non-
|
|
|
|
|
* kernel page table are globally performed. Otherwise, another
|
|
|
|
|
* processor could cache an old, pre-update entry without being
|
|
|
|
|
* invalidated. This can happen one of two ways: (1) The pmap becomes
|
|
|
|
|
* active on another processor after its pm_active field is checked by
|
|
|
|
|
* one of the following functions but before a store updating the page
|
|
|
|
|
* table is globally performed. (2) The pmap becomes active on another
|
|
|
|
|
* processor before its pm_active field is checked but due to
|
|
|
|
|
* speculative loads one of the following functions stills reads the
|
|
|
|
|
* pmap as inactive on the other processor.
|
|
|
|
|
*
|
|
|
|
|
* The kernel page table is exempt because its pm_active field is
|
|
|
|
|
* immutable. The kernel page table is always active on every
|
|
|
|
|
* processor.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
|
|
|
|
|
{
|
2011-06-28 13:13:06 +00:00
|
|
|
|
cpuset_t other_cpus;
|
|
|
|
|
u_int cpuid;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
|
|
|
|
|
pmap, va);
|
|
|
|
|
|
|
|
|
|
sched_pin();
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
|
2008-08-12 19:48:18 +00:00
|
|
|
|
invlpg(va);
|
|
|
|
|
smp_invlpg(va);
|
|
|
|
|
} else {
|
2011-06-28 13:13:06 +00:00
|
|
|
|
cpuid = PCPU_GET(cpuid);
|
2011-06-22 20:04:39 +00:00
|
|
|
|
other_cpus = all_cpus;
|
2011-06-28 13:13:06 +00:00
|
|
|
|
CPU_CLR(cpuid, &other_cpus);
|
|
|
|
|
if (CPU_ISSET(cpuid, &pmap->pm_active))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
invlpg(va);
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
CPU_AND(&other_cpus, &pmap->pm_active);
|
|
|
|
|
if (!CPU_EMPTY(&other_cpus))
|
|
|
|
|
smp_masked_invlpg(other_cpus, va);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
sched_unpin();
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
|
|
|
|
{
|
2011-06-28 13:13:06 +00:00
|
|
|
|
cpuset_t other_cpus;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
vm_offset_t addr;
|
2011-06-28 13:13:06 +00:00
|
|
|
|
u_int cpuid;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
|
|
|
|
|
pmap, sva, eva);
|
|
|
|
|
|
|
|
|
|
sched_pin();
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
|
2008-08-12 19:48:18 +00:00
|
|
|
|
for (addr = sva; addr < eva; addr += PAGE_SIZE)
|
|
|
|
|
invlpg(addr);
|
|
|
|
|
smp_invlpg_range(sva, eva);
|
|
|
|
|
} else {
|
2011-06-28 13:13:06 +00:00
|
|
|
|
cpuid = PCPU_GET(cpuid);
|
2011-06-22 20:04:39 +00:00
|
|
|
|
other_cpus = all_cpus;
|
2011-06-28 13:13:06 +00:00
|
|
|
|
CPU_CLR(cpuid, &other_cpus);
|
|
|
|
|
if (CPU_ISSET(cpuid, &pmap->pm_active))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
for (addr = sva; addr < eva; addr += PAGE_SIZE)
|
|
|
|
|
invlpg(addr);
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
CPU_AND(&other_cpus, &pmap->pm_active);
|
|
|
|
|
if (!CPU_EMPTY(&other_cpus))
|
|
|
|
|
smp_masked_invlpg_range(other_cpus, sva, eva);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
sched_unpin();
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_invalidate_all(pmap_t pmap)
|
|
|
|
|
{
|
2011-06-28 13:13:06 +00:00
|
|
|
|
cpuset_t other_cpus;
|
|
|
|
|
u_int cpuid;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
|
|
|
|
|
|
|
|
|
|
sched_pin();
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
|
2008-08-12 19:48:18 +00:00
|
|
|
|
invltlb();
|
|
|
|
|
smp_invltlb();
|
|
|
|
|
} else {
|
2011-06-28 13:13:06 +00:00
|
|
|
|
cpuid = PCPU_GET(cpuid);
|
2011-06-22 20:04:39 +00:00
|
|
|
|
other_cpus = all_cpus;
|
2011-06-28 13:13:06 +00:00
|
|
|
|
CPU_CLR(cpuid, &other_cpus);
|
|
|
|
|
if (CPU_ISSET(cpuid, &pmap->pm_active))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
invltlb();
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
CPU_AND(&other_cpus, &pmap->pm_active);
|
|
|
|
|
if (!CPU_EMPTY(&other_cpus))
|
|
|
|
|
smp_masked_invltlb(other_cpus);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
sched_unpin();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_invalidate_cache(void)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
sched_pin();
|
|
|
|
|
wbinvd();
|
|
|
|
|
smp_cache_flush();
|
|
|
|
|
sched_unpin();
|
|
|
|
|
}
|
|
|
|
|
#else /* !SMP */
|
|
|
|
|
/*
|
|
|
|
|
* Normal, non-SMP, 486+ invalidation functions.
|
|
|
|
|
* We inline these within pmap.c for speed.
|
|
|
|
|
*/
|
|
|
|
|
PMAP_INLINE void
|
|
|
|
|
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
|
|
|
|
|
{
|
|
|
|
|
CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
|
|
|
|
|
pmap, va);
|
|
|
|
|
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
invlpg(va);
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PMAP_INLINE void
|
|
|
|
|
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t addr;
|
|
|
|
|
|
|
|
|
|
if (eva - sva > PAGE_SIZE)
|
|
|
|
|
CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
|
|
|
|
|
pmap, sva, eva);
|
|
|
|
|
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
for (addr = sva; addr < eva; addr += PAGE_SIZE)
|
|
|
|
|
invlpg(addr);
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PMAP_INLINE void
|
|
|
|
|
pmap_invalidate_all(pmap_t pmap)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
|
|
|
|
|
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
invltlb();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PMAP_INLINE void
|
|
|
|
|
pmap_invalidate_cache(void)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
wbinvd();
|
|
|
|
|
}
|
|
|
|
|
#endif /* !SMP */
|
|
|
|
|
|
2009-07-29 19:38:33 +00:00
|
|
|
|
void
|
|
|
|
|
pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
KASSERT((sva & PAGE_MASK) == 0,
|
|
|
|
|
("pmap_invalidate_cache_range: sva not page-aligned"));
|
|
|
|
|
KASSERT((eva & PAGE_MASK) == 0,
|
|
|
|
|
("pmap_invalidate_cache_range: eva not page-aligned"));
|
|
|
|
|
|
|
|
|
|
if (cpu_feature & CPUID_SS)
|
|
|
|
|
; /* If "Self Snoop" is supported, do nothing. */
|
|
|
|
|
else if (cpu_feature & CPUID_CLFSH) {
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Otherwise, do per-cache line flush. Use the mfence
|
|
|
|
|
* instruction to insure that previous stores are
|
|
|
|
|
* included in the write-back. The processor
|
|
|
|
|
* propagates flush to other processors in the cache
|
|
|
|
|
* coherence domain.
|
|
|
|
|
*/
|
|
|
|
|
mfence();
|
2009-09-09 19:40:54 +00:00
|
|
|
|
for (; sva < eva; sva += cpu_clflush_line_size)
|
|
|
|
|
clflush(sva);
|
2009-07-29 19:38:33 +00:00
|
|
|
|
mfence();
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* No targeted cache flush methods are supported by CPU,
|
|
|
|
|
* globally invalidate cache as a last resort.
|
|
|
|
|
*/
|
|
|
|
|
pmap_invalidate_cache();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* Are we current address space or kernel? N.B. We return FALSE when
|
|
|
|
|
* a pmap's page table is in use because a kernel thread is borrowing
|
|
|
|
|
* it. The borrowed page table can change spontaneously, making any
|
|
|
|
|
* dependence on its continued use subject to a race condition.
|
|
|
|
|
*/
|
|
|
|
|
static __inline int
|
|
|
|
|
pmap_is_current(pmap_t pmap)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
return (pmap == kernel_pmap ||
|
|
|
|
|
(pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
|
|
|
|
|
(pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the given pmap is not the current or kernel pmap, the returned pte must
|
|
|
|
|
* be released by passing it to pmap_pte_release().
|
|
|
|
|
*/
|
|
|
|
|
pt_entry_t *
|
|
|
|
|
pmap_pte(pmap_t pmap, vm_offset_t va)
|
|
|
|
|
{
|
|
|
|
|
pd_entry_t newpf;
|
|
|
|
|
pd_entry_t *pde;
|
|
|
|
|
|
|
|
|
|
pde = pmap_pde(pmap, va);
|
|
|
|
|
if (*pde & PG_PS)
|
|
|
|
|
return (pde);
|
|
|
|
|
if (*pde != 0) {
|
|
|
|
|
/* are we current address space or kernel? */
|
|
|
|
|
if (pmap_is_current(pmap))
|
|
|
|
|
return (vtopte(va));
|
|
|
|
|
mtx_lock(&PMAP2mutex);
|
|
|
|
|
newpf = *pde & PG_FRAME;
|
|
|
|
|
if ((*PMAP2 & PG_FRAME) != newpf) {
|
2010-02-21 01:13:34 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M);
|
2010-02-21 01:13:34 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
|
|
|
|
|
pmap, va, (*PMAP2 & 0xffffffff));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
|
|
|
|
|
}
|
|
|
|
|
return (0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Releases a pte that was obtained from pmap_pte(). Be prepared for the pte
|
|
|
|
|
* being NULL.
|
|
|
|
|
*/
|
|
|
|
|
static __inline void
|
|
|
|
|
pmap_pte_release(pt_entry_t *pte)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) {
|
|
|
|
|
CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx",
|
|
|
|
|
*PMAP2);
|
2010-12-26 13:05:43 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PT_SET_VA(PMAP2, 0, TRUE);
|
2010-12-26 13:05:43 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
mtx_unlock(&PMAP2mutex);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
|
invlcaddr(void *caddr)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
invlpg((u_int)caddr);
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Super fast pmap_pte routine best used when scanning
|
|
|
|
|
* the pv lists. This eliminates many coarse-grained
|
|
|
|
|
* invltlb calls. Note that many of the pv list
|
|
|
|
|
* scans are across different pmaps. It is very wasteful
|
|
|
|
|
* to do an entire invltlb for checking a single mapping.
|
|
|
|
|
*
|
|
|
|
|
* If the given pmap is not the current pmap, vm_page_queue_mtx
|
|
|
|
|
* must be held and curthread pinned to a CPU.
|
|
|
|
|
*/
|
|
|
|
|
static pt_entry_t *
|
|
|
|
|
pmap_pte_quick(pmap_t pmap, vm_offset_t va)
|
|
|
|
|
{
|
|
|
|
|
pd_entry_t newpf;
|
|
|
|
|
pd_entry_t *pde;
|
|
|
|
|
|
|
|
|
|
pde = pmap_pde(pmap, va);
|
|
|
|
|
if (*pde & PG_PS)
|
|
|
|
|
return (pde);
|
|
|
|
|
if (*pde != 0) {
|
|
|
|
|
/* are we current address space or kernel? */
|
|
|
|
|
if (pmap_is_current(pmap))
|
|
|
|
|
return (vtopte(va));
|
|
|
|
|
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
|
|
|
|
|
KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
|
|
|
|
|
newpf = *pde & PG_FRAME;
|
|
|
|
|
if ((*PMAP1 & PG_FRAME) != newpf) {
|
|
|
|
|
PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M);
|
|
|
|
|
CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x",
|
|
|
|
|
pmap, va, (u_long)*PMAP1);
|
|
|
|
|
|
|
|
|
|
#ifdef SMP
|
|
|
|
|
PMAP1cpu = PCPU_GET(cpuid);
|
|
|
|
|
#endif
|
|
|
|
|
PMAP1changed++;
|
|
|
|
|
} else
|
|
|
|
|
#ifdef SMP
|
|
|
|
|
if (PMAP1cpu != PCPU_GET(cpuid)) {
|
|
|
|
|
PMAP1cpu = PCPU_GET(cpuid);
|
|
|
|
|
invlcaddr(PADDR1);
|
|
|
|
|
PMAP1changedcpu++;
|
|
|
|
|
} else
|
|
|
|
|
#endif
|
|
|
|
|
PMAP1unchanged++;
|
|
|
|
|
return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
|
|
|
|
|
}
|
|
|
|
|
return (0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Routine: pmap_extract
|
|
|
|
|
* Function:
|
|
|
|
|
* Extract the physical page address associated
|
|
|
|
|
* with the given map/virtual_address pair.
|
|
|
|
|
*/
|
|
|
|
|
vm_paddr_t
|
|
|
|
|
pmap_extract(pmap_t pmap, vm_offset_t va)
|
|
|
|
|
{
|
|
|
|
|
vm_paddr_t rtval;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
pd_entry_t pde;
|
|
|
|
|
pt_entry_t pteval;
|
|
|
|
|
|
|
|
|
|
rtval = 0;
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pde = pmap->pm_pdir[va >> PDRSHIFT];
|
|
|
|
|
if (pde != 0) {
|
|
|
|
|
if ((pde & PG_PS) != 0) {
|
|
|
|
|
rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK);
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
return rtval;
|
|
|
|
|
}
|
|
|
|
|
pte = pmap_pte(pmap, va);
|
|
|
|
|
pteval = *pte ? xpmap_mtop(*pte) : 0;
|
|
|
|
|
rtval = (pteval & PG_FRAME) | (va & PAGE_MASK);
|
|
|
|
|
pmap_pte_release(pte);
|
|
|
|
|
}
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
return (rtval);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Routine: pmap_extract_ma
|
|
|
|
|
* Function:
|
|
|
|
|
* Like pmap_extract, but returns machine address
|
|
|
|
|
*/
|
|
|
|
|
vm_paddr_t
|
|
|
|
|
pmap_extract_ma(pmap_t pmap, vm_offset_t va)
|
|
|
|
|
{
|
|
|
|
|
vm_paddr_t rtval;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
pd_entry_t pde;
|
|
|
|
|
|
|
|
|
|
rtval = 0;
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pde = pmap->pm_pdir[va >> PDRSHIFT];
|
|
|
|
|
if (pde != 0) {
|
|
|
|
|
if ((pde & PG_PS) != 0) {
|
|
|
|
|
rtval = (pde & ~PDRMASK) | (va & PDRMASK);
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
return rtval;
|
|
|
|
|
}
|
|
|
|
|
pte = pmap_pte(pmap, va);
|
|
|
|
|
rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
|
|
|
|
|
pmap_pte_release(pte);
|
|
|
|
|
}
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
return (rtval);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Routine: pmap_extract_and_hold
|
|
|
|
|
* Function:
|
|
|
|
|
* Atomically extract and hold the physical page
|
|
|
|
|
* with the given pmap and virtual address pair
|
|
|
|
|
* if that mapping permits the given protection.
|
|
|
|
|
*/
|
|
|
|
|
vm_page_t
|
|
|
|
|
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
|
|
|
|
|
{
|
|
|
|
|
pd_entry_t pde;
|
|
|
|
|
pt_entry_t pte;
|
|
|
|
|
vm_page_t m;
|
2010-04-30 00:46:43 +00:00
|
|
|
|
vm_paddr_t pa;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
2010-04-30 00:46:43 +00:00
|
|
|
|
pa = 0;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
m = NULL;
|
|
|
|
|
PMAP_LOCK(pmap);
|
2010-04-30 00:46:43 +00:00
|
|
|
|
retry:
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pde = PT_GET(pmap_pde(pmap, va));
|
|
|
|
|
if (pde != 0) {
|
|
|
|
|
if (pde & PG_PS) {
|
|
|
|
|
if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
|
2010-04-30 00:46:43 +00:00
|
|
|
|
if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
|
|
|
|
|
(va & PDRMASK), &pa))
|
|
|
|
|
goto retry;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
|
|
|
|
|
(va & PDRMASK));
|
|
|
|
|
vm_page_hold(m);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
sched_pin();
|
|
|
|
|
pte = PT_GET(pmap_pte_quick(pmap, va));
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_MA(PADDR1, 0);
|
|
|
|
|
if ((pte & PG_V) &&
|
|
|
|
|
((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
|
2010-04-30 00:46:43 +00:00
|
|
|
|
if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
|
|
|
|
|
goto retry;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
|
|
|
|
|
vm_page_hold(m);
|
|
|
|
|
}
|
|
|
|
|
sched_unpin();
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-04-30 00:46:43 +00:00
|
|
|
|
PA_UNLOCK_COND(pa);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
return (m);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/***************************************************
|
|
|
|
|
* Low level mapping routines.....
|
|
|
|
|
***************************************************/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Add a wired page to the kva.
|
|
|
|
|
* Note: not SMP coherent.
|
|
|
|
|
*/
|
2008-08-15 04:00:44 +00:00
|
|
|
|
void
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pmap_kenter(vm_offset_t va, vm_paddr_t pa)
|
|
|
|
|
{
|
|
|
|
|
PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
|
|
|
|
|
}
|
|
|
|
|
|
2008-08-15 04:00:44 +00:00
|
|
|
|
void
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
|
|
|
|
|
{
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
|
|
|
|
|
pte = vtopte(va);
|
|
|
|
|
pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2008-08-15 04:00:44 +00:00
|
|
|
|
static __inline void
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
|
|
|
|
|
{
|
|
|
|
|
PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remove a page from the kernel pagetables.
|
|
|
|
|
* Note: not SMP coherent.
|
|
|
|
|
*/
|
|
|
|
|
PMAP_INLINE void
|
|
|
|
|
pmap_kremove(vm_offset_t va)
|
|
|
|
|
{
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
|
|
|
|
|
pte = vtopte(va);
|
|
|
|
|
PT_CLEAR_VA(pte, FALSE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Used to map a range of physical addresses into kernel
|
|
|
|
|
* virtual address space.
|
|
|
|
|
*
|
|
|
|
|
* The value passed in '*virt' is a suggested virtual address for
|
|
|
|
|
* the mapping. Architectures which can support a direct-mapped
|
|
|
|
|
* physical to virtual region can return the appropriate address
|
|
|
|
|
* within that region, leaving '*virt' unchanged. Other
|
|
|
|
|
* architectures should map the pages starting at '*virt' and
|
|
|
|
|
* update '*virt' with the first usable address after the mapped
|
|
|
|
|
* region.
|
|
|
|
|
*/
|
|
|
|
|
vm_offset_t
|
|
|
|
|
pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t va, sva;
|
|
|
|
|
|
|
|
|
|
va = sva = *virt;
|
|
|
|
|
CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x",
|
|
|
|
|
va, start, end, prot);
|
|
|
|
|
while (start < end) {
|
|
|
|
|
pmap_kenter(va, start);
|
|
|
|
|
va += PAGE_SIZE;
|
|
|
|
|
start += PAGE_SIZE;
|
|
|
|
|
}
|
|
|
|
|
pmap_invalidate_range(kernel_pmap, sva, va);
|
|
|
|
|
*virt = va;
|
|
|
|
|
return (sva);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Add a list of wired pages to the kva
|
|
|
|
|
* this routine is only used for temporary
|
|
|
|
|
* kernel mappings that do not need to have
|
|
|
|
|
* page modification or references recorded.
|
|
|
|
|
* Note that old mappings are simply written
|
|
|
|
|
* over. The page *must* be wired.
|
|
|
|
|
* Note: SMP coherent. Uses a ranged shootdown IPI.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
|
|
|
|
|
{
|
|
|
|
|
pt_entry_t *endpte, *pte;
|
|
|
|
|
vm_paddr_t pa;
|
|
|
|
|
vm_offset_t va = sva;
|
|
|
|
|
int mclcount = 0;
|
|
|
|
|
multicall_entry_t mcl[16];
|
|
|
|
|
multicall_entry_t *mclp = mcl;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count);
|
|
|
|
|
pte = vtopte(sva);
|
|
|
|
|
endpte = pte + count;
|
|
|
|
|
while (pte < endpte) {
|
2010-11-20 20:04:29 +00:00
|
|
|
|
pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
mclp->op = __HYPERVISOR_update_va_mapping;
|
|
|
|
|
mclp->args[0] = va;
|
|
|
|
|
mclp->args[1] = (uint32_t)(pa & 0xffffffff);
|
|
|
|
|
mclp->args[2] = (uint32_t)(pa >> 32);
|
|
|
|
|
mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0;
|
|
|
|
|
|
|
|
|
|
va += PAGE_SIZE;
|
|
|
|
|
pte++;
|
|
|
|
|
ma++;
|
|
|
|
|
mclp++;
|
|
|
|
|
mclcount++;
|
|
|
|
|
if (mclcount == 16) {
|
|
|
|
|
error = HYPERVISOR_multicall(mcl, mclcount);
|
|
|
|
|
mclp = mcl;
|
|
|
|
|
mclcount = 0;
|
|
|
|
|
KASSERT(error == 0, ("bad multicall %d", error));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (mclcount) {
|
|
|
|
|
error = HYPERVISOR_multicall(mcl, mclcount);
|
|
|
|
|
KASSERT(error == 0, ("bad multicall %d", error));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef INVARIANTS
|
|
|
|
|
for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++)
|
|
|
|
|
KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE));
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This routine tears out page mappings from the
|
|
|
|
|
* kernel -- it is meant only for temporary mappings.
|
|
|
|
|
* Note: SMP coherent. Uses a ranged shootdown IPI.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_qremove(vm_offset_t sva, int count)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t va;
|
|
|
|
|
|
|
|
|
|
CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count);
|
|
|
|
|
va = sva;
|
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
critical_enter();
|
|
|
|
|
while (count-- > 0) {
|
|
|
|
|
pmap_kremove(va);
|
|
|
|
|
va += PAGE_SIZE;
|
|
|
|
|
}
|
2010-11-25 22:06:07 +00:00
|
|
|
|
PT_UPDATES_FLUSH();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pmap_invalidate_range(kernel_pmap, sva, va);
|
|
|
|
|
critical_exit();
|
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/***************************************************
|
|
|
|
|
* Page table page management routines.....
|
|
|
|
|
***************************************************/
|
|
|
|
|
static __inline void
|
|
|
|
|
pmap_free_zero_pages(vm_page_t free)
|
|
|
|
|
{
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
|
|
|
|
|
while (free != NULL) {
|
|
|
|
|
m = free;
|
|
|
|
|
free = m->right;
|
|
|
|
|
vm_page_free_zero(m);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This routine unholds page table pages, and if the hold count
|
|
|
|
|
* drops to zero, then it decrements the wire count.
|
|
|
|
|
*/
|
|
|
|
|
static __inline int
|
|
|
|
|
pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
--m->wire_count;
|
|
|
|
|
if (m->wire_count == 0)
|
|
|
|
|
return _pmap_unwire_pte_hold(pmap, m, free);
|
|
|
|
|
else
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t pteva;
|
|
|
|
|
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
/*
|
|
|
|
|
* unmap the page table page
|
|
|
|
|
*/
|
|
|
|
|
xen_pt_unpin(pmap->pm_pdir[m->pindex]);
|
|
|
|
|
/*
|
|
|
|
|
* page *might* contain residual mapping :-/
|
|
|
|
|
*/
|
|
|
|
|
PD_CLEAR_VA(pmap, m->pindex, TRUE);
|
|
|
|
|
pmap_zero_page(m);
|
|
|
|
|
--pmap->pm_stats.resident_count;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This is a release store so that the ordinary store unmapping
|
|
|
|
|
* the page table page is globally performed before TLB shoot-
|
|
|
|
|
* down is begun.
|
|
|
|
|
*/
|
|
|
|
|
atomic_subtract_rel_int(&cnt.v_wire_count, 1);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Do an invltlb to make the invalidated mapping
|
|
|
|
|
* take effect immediately.
|
|
|
|
|
*/
|
|
|
|
|
pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
|
|
|
|
|
pmap_invalidate_page(pmap, pteva);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Put page on a list so that it is released after
|
|
|
|
|
* *ALL* TLB shootdown is done
|
|
|
|
|
*/
|
|
|
|
|
m->right = *free;
|
|
|
|
|
*free = m;
|
|
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* After removing a page table entry, this routine is used to
|
|
|
|
|
* conditionally free the page, and manage the hold/wire counts.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
|
|
|
|
|
{
|
|
|
|
|
pd_entry_t ptepde;
|
|
|
|
|
vm_page_t mpte;
|
|
|
|
|
|
|
|
|
|
if (va >= VM_MAXUSER_ADDRESS)
|
|
|
|
|
return 0;
|
|
|
|
|
ptepde = PT_GET(pmap_pde(pmap, va));
|
|
|
|
|
mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
|
|
|
|
|
return pmap_unwire_pte_hold(pmap, mpte, free);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_pinit0(pmap_t pmap)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
PMAP_LOCK_INIT(pmap);
|
|
|
|
|
pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
|
|
|
|
|
#endif
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
CPU_ZERO(&pmap->pm_active);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PCPU_SET(curpmap, pmap);
|
|
|
|
|
TAILQ_INIT(&pmap->pm_pvchunk);
|
|
|
|
|
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
|
|
|
|
|
mtx_lock_spin(&allpmaps_lock);
|
|
|
|
|
LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
|
|
|
|
|
mtx_unlock_spin(&allpmaps_lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Initialize a preallocated and zeroed pmap structure,
|
|
|
|
|
* such as one in a vmspace structure.
|
|
|
|
|
*/
|
|
|
|
|
int
|
|
|
|
|
pmap_pinit(pmap_t pmap)
|
|
|
|
|
{
|
|
|
|
|
vm_page_t m, ptdpg[NPGPTD + 1];
|
|
|
|
|
int npgptd = NPGPTD + 1;
|
|
|
|
|
static int color;
|
|
|
|
|
int i;
|
|
|
|
|
|
2011-01-04 15:55:15 +00:00
|
|
|
|
#ifdef HAMFISTED_LOCKING
|
|
|
|
|
mtx_lock(&createdelete_lock);
|
|
|
|
|
#endif
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PMAP_LOCK_INIT(pmap);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* No need to allocate page table space yet but we do need a valid
|
|
|
|
|
* page directory table.
|
|
|
|
|
*/
|
|
|
|
|
if (pmap->pm_pdir == NULL) {
|
|
|
|
|
pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
|
|
|
|
|
NBPTD);
|
|
|
|
|
if (pmap->pm_pdir == NULL) {
|
|
|
|
|
PMAP_LOCK_DESTROY(pmap);
|
2011-01-04 15:55:15 +00:00
|
|
|
|
#ifdef HAMFISTED_LOCKING
|
|
|
|
|
mtx_unlock(&createdelete_lock);
|
|
|
|
|
#endif
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return (0);
|
|
|
|
|
}
|
2010-11-20 21:36:12 +00:00
|
|
|
|
#ifdef PAE
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pmap->pm_pdpt = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* allocate the page directory page(s)
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < npgptd;) {
|
|
|
|
|
m = vm_page_alloc(NULL, color++,
|
|
|
|
|
VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
|
|
|
|
|
VM_ALLOC_ZERO);
|
|
|
|
|
if (m == NULL)
|
|
|
|
|
VM_WAIT;
|
|
|
|
|
else {
|
|
|
|
|
ptdpg[i++] = m;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
|
|
|
|
|
for (i = 0; i < NPGPTD; i++) {
|
|
|
|
|
if ((ptdpg[i]->flags & PG_ZERO) == 0)
|
|
|
|
|
pagezero(&pmap->pm_pdir[i*NPTEPG]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mtx_lock_spin(&allpmaps_lock);
|
|
|
|
|
LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
|
|
|
|
|
mtx_unlock_spin(&allpmaps_lock);
|
|
|
|
|
/* Wire in kernel global address entries. */
|
|
|
|
|
|
|
|
|
|
bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
|
|
|
|
|
if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
|
|
|
|
|
bzero(pmap->pm_pdpt, PAGE_SIZE);
|
|
|
|
|
for (i = 0; i < NPGPTD; i++) {
|
|
|
|
|
vm_paddr_t ma;
|
|
|
|
|
|
2010-11-20 20:04:29 +00:00
|
|
|
|
ma = VM_PAGE_TO_MACH(ptdpg[i]);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pmap->pm_pdpt[i] = ma | PG_V;
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
for (i = 0; i < NPGPTD; i++) {
|
|
|
|
|
pt_entry_t *pd;
|
|
|
|
|
vm_paddr_t ma;
|
|
|
|
|
|
2010-11-20 20:04:29 +00:00
|
|
|
|
ma = VM_PAGE_TO_MACH(ptdpg[i]);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pd = pmap->pm_pdir + (i * NPDEPG);
|
|
|
|
|
PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW));
|
|
|
|
|
#if 0
|
|
|
|
|
xen_pgd_pin(ma);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW);
|
|
|
|
|
#endif
|
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
xen_flush_queue();
|
2010-11-20 20:04:29 +00:00
|
|
|
|
xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD]));
|
2008-08-12 19:48:18 +00:00
|
|
|
|
for (i = 0; i < NPGPTD; i++) {
|
2010-11-20 20:04:29 +00:00
|
|
|
|
vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE);
|
|
|
|
|
}
|
|
|
|
|
xen_flush_queue();
|
|
|
|
|
vm_page_unlock_queues();
|
Commit the support for removing cpumask_t and replacing it directly with
cpuset_t objects.
That is going to offer the underlying support for a simple bump of
MAXCPU and then support for number of cpus > 32 (as it is today).
Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.
The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN
while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.
Some technical notes:
- This commit may be considered an ABI nop for all the architectures
different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
accessed avoiding migration, because the size of cpuset_t should be
considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
primirally done in order to leave some more space in userland to cope
with KBI extensions). If you need to access kernel cpuset_t from the
userland please refer to example in this patch on how to do that
correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now
The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.
Tested by: pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by: jeff, jhb, sbruno
2011-05-05 14:39:14 +00:00
|
|
|
|
CPU_ZERO(&pmap->pm_active);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
TAILQ_INIT(&pmap->pm_pvchunk);
|
|
|
|
|
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
|
|
|
|
|
|
2011-01-04 15:55:15 +00:00
|
|
|
|
#ifdef HAMFISTED_LOCKING
|
|
|
|
|
mtx_unlock(&createdelete_lock);
|
|
|
|
|
#endif
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return (1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* this routine is called if the page table page is not
|
|
|
|
|
* mapped correctly.
|
|
|
|
|
*/
|
|
|
|
|
static vm_page_t
|
|
|
|
|
_pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags)
|
|
|
|
|
{
|
|
|
|
|
vm_paddr_t ptema;
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
|
|
|
|
|
KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
|
|
|
|
|
(flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
|
|
|
|
|
("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Allocate a page table page.
|
|
|
|
|
*/
|
|
|
|
|
if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
|
|
|
|
|
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
|
|
|
|
|
if (flags & M_WAITOK) {
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
VM_WAIT;
|
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Indicate the need to retry. While waiting, the page table
|
|
|
|
|
* page may have been allocated.
|
|
|
|
|
*/
|
|
|
|
|
return (NULL);
|
|
|
|
|
}
|
|
|
|
|
if ((m->flags & PG_ZERO) == 0)
|
|
|
|
|
pmap_zero_page(m);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Map the pagetable page into the process address space, if
|
|
|
|
|
* it isn't already there.
|
|
|
|
|
*/
|
|
|
|
|
pmap->pm_stats.resident_count++;
|
|
|
|
|
|
2010-11-20 20:04:29 +00:00
|
|
|
|
ptema = VM_PAGE_TO_MACH(m);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
xen_pt_pin(ptema);
|
|
|
|
|
PT_SET_VA_MA(&pmap->pm_pdir[ptepindex],
|
|
|
|
|
(ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
|
|
|
|
|
|
|
|
|
|
KASSERT(pmap->pm_pdir[ptepindex],
|
|
|
|
|
("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex));
|
|
|
|
|
return (m);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static vm_page_t
|
|
|
|
|
pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
|
|
|
|
|
{
|
|
|
|
|
unsigned ptepindex;
|
|
|
|
|
pd_entry_t ptema;
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
|
|
|
|
|
KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
|
|
|
|
|
(flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
|
|
|
|
|
("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate pagetable page index
|
|
|
|
|
*/
|
|
|
|
|
ptepindex = va >> PDRSHIFT;
|
|
|
|
|
retry:
|
|
|
|
|
/*
|
|
|
|
|
* Get the page directory entry
|
|
|
|
|
*/
|
|
|
|
|
ptema = pmap->pm_pdir[ptepindex];
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This supports switching from a 4MB page to a
|
|
|
|
|
* normal 4K page.
|
|
|
|
|
*/
|
|
|
|
|
if (ptema & PG_PS) {
|
|
|
|
|
/*
|
|
|
|
|
* XXX
|
|
|
|
|
*/
|
|
|
|
|
pmap->pm_pdir[ptepindex] = 0;
|
|
|
|
|
ptema = 0;
|
|
|
|
|
pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
|
|
|
|
|
pmap_invalidate_all(kernel_pmap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the page table page is mapped, we just increment the
|
|
|
|
|
* hold count, and activate it.
|
|
|
|
|
*/
|
|
|
|
|
if (ptema & PG_V) {
|
|
|
|
|
m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
|
|
|
|
|
m->wire_count++;
|
|
|
|
|
} else {
|
|
|
|
|
/*
|
|
|
|
|
* Here if the pte page isn't mapped, or if it has
|
|
|
|
|
* been deallocated.
|
|
|
|
|
*/
|
|
|
|
|
CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x",
|
|
|
|
|
pmap, va, flags);
|
|
|
|
|
m = _pmap_allocpte(pmap, ptepindex, flags);
|
|
|
|
|
if (m == NULL && (flags & M_WAITOK))
|
|
|
|
|
goto retry;
|
|
|
|
|
|
|
|
|
|
KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex));
|
|
|
|
|
}
|
|
|
|
|
return (m);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/***************************************************
|
|
|
|
|
* Pmap allocation/deallocation routines.
|
|
|
|
|
***************************************************/
|
|
|
|
|
|
2011-05-20 14:53:16 +00:00
|
|
|
|
#ifdef SMP
|
|
|
|
|
/*
|
|
|
|
|
* Deal with a SMP shootdown of other users of the pmap that we are
|
|
|
|
|
* trying to dispose of. This can be a bit hairy.
|
|
|
|
|
*/
|
|
|
|
|
static cpuset_t *lazymask;
|
|
|
|
|
static u_int lazyptd;
|
|
|
|
|
static volatile u_int lazywait;
|
|
|
|
|
|
|
|
|
|
void pmap_lazyfix_action(void);
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_lazyfix_action(void)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
#ifdef COUNT_IPIS
|
|
|
|
|
(*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
|
|
|
|
|
#endif
|
|
|
|
|
if (rcr3() == lazyptd)
|
|
|
|
|
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
|
|
|
|
|
CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask);
|
|
|
|
|
atomic_store_rel_int(&lazywait, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2011-06-28 13:13:06 +00:00
|
|
|
|
pmap_lazyfix_self(u_int cpuid)
|
2011-05-20 14:53:16 +00:00
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
if (rcr3() == lazyptd)
|
|
|
|
|
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
|
2011-06-28 13:13:06 +00:00
|
|
|
|
CPU_CLR_ATOMIC(cpuid, lazymask);
|
2011-05-20 14:53:16 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
pmap_lazyfix(pmap_t pmap)
|
|
|
|
|
{
|
|
|
|
|
cpuset_t mymask, mask;
|
2011-06-28 13:13:06 +00:00
|
|
|
|
u_int cpuid, spins;
|
2011-05-20 14:53:16 +00:00
|
|
|
|
int lsb;
|
|
|
|
|
|
|
|
|
|
mask = pmap->pm_active;
|
|
|
|
|
while (!CPU_EMPTY(&mask)) {
|
|
|
|
|
spins = 50000000;
|
|
|
|
|
|
|
|
|
|
/* Find least significant set bit. */
|
|
|
|
|
lsb = cpusetobj_ffs(&mask);
|
2011-05-22 20:24:36 +00:00
|
|
|
|
MPASS(lsb != 0);
|
2011-05-20 14:53:16 +00:00
|
|
|
|
lsb--;
|
|
|
|
|
CPU_SETOF(lsb, &mask);
|
|
|
|
|
mtx_lock_spin(&smp_ipi_mtx);
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
lazyptd = vtophys(pmap->pm_pdpt);
|
|
|
|
|
#else
|
|
|
|
|
lazyptd = vtophys(pmap->pm_pdir);
|
|
|
|
|
#endif
|
2011-06-28 13:13:06 +00:00
|
|
|
|
cpuid = PCPU_GET(cpuid);
|
|
|
|
|
|
|
|
|
|
/* Use a cpuset just for having an easy check. */
|
|
|
|
|
CPU_SETOF(cpuid, &mymask);
|
2011-05-24 15:47:40 +00:00
|
|
|
|
if (!CPU_CMP(&mask, &mymask)) {
|
2011-05-20 14:53:16 +00:00
|
|
|
|
lazymask = &pmap->pm_active;
|
2011-06-28 13:13:06 +00:00
|
|
|
|
pmap_lazyfix_self(cpuid);
|
2011-05-20 14:53:16 +00:00
|
|
|
|
} else {
|
|
|
|
|
atomic_store_rel_int((u_int *)&lazymask,
|
|
|
|
|
(u_int)&pmap->pm_active);
|
|
|
|
|
atomic_store_rel_int(&lazywait, 0);
|
|
|
|
|
ipi_selected(mask, IPI_LAZYPMAP);
|
|
|
|
|
while (lazywait == 0) {
|
|
|
|
|
ia32_pause();
|
|
|
|
|
if (--spins == 0)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
mtx_unlock_spin(&smp_ipi_mtx);
|
|
|
|
|
if (spins == 0)
|
|
|
|
|
printf("pmap_lazyfix: spun for 50000000\n");
|
|
|
|
|
mask = pmap->pm_active;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#else /* SMP */
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Cleaning up on uniprocessor is easy. For various reasons, we're
|
|
|
|
|
* unlikely to have to even execute this code, including the fact
|
|
|
|
|
* that the cleanup is deferred until the parent does a wait(2), which
|
|
|
|
|
* means that another userland process has run.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
pmap_lazyfix(pmap_t pmap)
|
|
|
|
|
{
|
|
|
|
|
u_int cr3;
|
|
|
|
|
|
|
|
|
|
cr3 = vtophys(pmap->pm_pdir);
|
|
|
|
|
if (cr3 == rcr3()) {
|
|
|
|
|
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
|
|
|
|
|
CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif /* SMP */
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* Release any resources held by the given physical map.
|
|
|
|
|
* Called when a pmap initialized by pmap_pinit is being released.
|
|
|
|
|
* Should only be called if the map contains no valid mappings.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_release(pmap_t pmap)
|
|
|
|
|
{
|
|
|
|
|
vm_page_t m, ptdpg[2*NPGPTD+1];
|
|
|
|
|
vm_paddr_t ma;
|
|
|
|
|
int i;
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
int npgptd = NPGPTD + 1;
|
|
|
|
|
#else
|
|
|
|
|
int npgptd = NPGPTD;
|
|
|
|
|
#endif
|
|
|
|
|
KASSERT(pmap->pm_stats.resident_count == 0,
|
|
|
|
|
("pmap_release: pmap resident count %ld != 0",
|
|
|
|
|
pmap->pm_stats.resident_count));
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
|
2011-01-04 15:55:15 +00:00
|
|
|
|
#ifdef HAMFISTED_LOCKING
|
|
|
|
|
mtx_lock(&createdelete_lock);
|
|
|
|
|
#endif
|
|
|
|
|
|
2011-05-20 14:53:16 +00:00
|
|
|
|
pmap_lazyfix(pmap);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
mtx_lock_spin(&allpmaps_lock);
|
|
|
|
|
LIST_REMOVE(pmap, pm_list);
|
|
|
|
|
mtx_unlock_spin(&allpmaps_lock);
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < NPGPTD; i++)
|
|
|
|
|
ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME);
|
|
|
|
|
pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
|
2010-11-20 21:36:12 +00:00
|
|
|
|
#ifdef PAE
|
2008-08-12 19:48:18 +00:00
|
|
|
|
ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt));
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < npgptd; i++) {
|
|
|
|
|
m = ptdpg[i];
|
2010-11-20 20:04:29 +00:00
|
|
|
|
ma = VM_PAGE_TO_MACH(m);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/* unpinning L1 and L2 treated the same */
|
2010-11-19 15:12:19 +00:00
|
|
|
|
#if 0
|
2008-08-12 19:48:18 +00:00
|
|
|
|
xen_pgd_unpin(ma);
|
2010-11-19 15:12:19 +00:00
|
|
|
|
#else
|
|
|
|
|
if (i == NPGPTD)
|
|
|
|
|
xen_pgd_unpin(ma);
|
|
|
|
|
#endif
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#ifdef PAE
|
2010-11-18 21:02:40 +00:00
|
|
|
|
if (i < NPGPTD)
|
2010-11-20 20:04:29 +00:00
|
|
|
|
KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME),
|
2010-11-18 21:02:40 +00:00
|
|
|
|
("pmap_release: got wrong ptd page"));
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#endif
|
|
|
|
|
m->wire_count--;
|
|
|
|
|
atomic_subtract_int(&cnt.v_wire_count, 1);
|
|
|
|
|
vm_page_free(m);
|
|
|
|
|
}
|
2010-11-18 21:29:43 +00:00
|
|
|
|
#ifdef PAE
|
|
|
|
|
pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1);
|
|
|
|
|
#endif
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PMAP_LOCK_DESTROY(pmap);
|
2011-01-04 15:55:15 +00:00
|
|
|
|
|
|
|
|
|
#ifdef HAMFISTED_LOCKING
|
|
|
|
|
mtx_unlock(&createdelete_lock);
|
|
|
|
|
#endif
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
kvm_size(SYSCTL_HANDLER_ARGS)
|
|
|
|
|
{
|
|
|
|
|
unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
|
|
|
|
|
|
|
|
|
|
return sysctl_handle_long(oidp, &ksize, 0, req);
|
|
|
|
|
}
|
|
|
|
|
SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
|
|
|
|
|
0, 0, kvm_size, "IU", "Size of KVM");
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
kvm_free(SYSCTL_HANDLER_ARGS)
|
|
|
|
|
{
|
|
|
|
|
unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
|
|
|
|
|
|
|
|
|
|
return sysctl_handle_long(oidp, &kfree, 0, req);
|
|
|
|
|
}
|
|
|
|
|
SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
|
|
|
|
|
0, 0, kvm_free, "IU", "Amount of KVM free");
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* grow the number of kernel page table entries, if needed
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_growkernel(vm_offset_t addr)
|
|
|
|
|
{
|
|
|
|
|
struct pmap *pmap;
|
|
|
|
|
vm_paddr_t ptppaddr;
|
|
|
|
|
vm_page_t nkpg;
|
|
|
|
|
pd_entry_t newpdir;
|
|
|
|
|
|
|
|
|
|
mtx_assert(&kernel_map->system_mtx, MA_OWNED);
|
|
|
|
|
if (kernel_vm_end == 0) {
|
|
|
|
|
kernel_vm_end = KERNBASE;
|
|
|
|
|
nkpt = 0;
|
|
|
|
|
while (pdir_pde(PTD, kernel_vm_end)) {
|
|
|
|
|
kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
|
|
|
|
|
nkpt++;
|
|
|
|
|
if (kernel_vm_end - 1 >= kernel_map->max_offset) {
|
|
|
|
|
kernel_vm_end = kernel_map->max_offset;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
addr = roundup2(addr, PAGE_SIZE * NPTEPG);
|
|
|
|
|
if (addr - 1 >= kernel_map->max_offset)
|
|
|
|
|
addr = kernel_map->max_offset;
|
|
|
|
|
while (kernel_vm_end < addr) {
|
|
|
|
|
if (pdir_pde(PTD, kernel_vm_end)) {
|
|
|
|
|
kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
|
|
|
|
|
if (kernel_vm_end - 1 >= kernel_map->max_offset) {
|
|
|
|
|
kernel_vm_end = kernel_map->max_offset;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This index is bogus, but out of the way
|
|
|
|
|
*/
|
|
|
|
|
nkpg = vm_page_alloc(NULL, nkpt,
|
|
|
|
|
VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
|
|
|
|
|
if (!nkpg)
|
|
|
|
|
panic("pmap_growkernel: no memory to grow kernel");
|
|
|
|
|
|
|
|
|
|
nkpt++;
|
|
|
|
|
|
|
|
|
|
pmap_zero_page(nkpg);
|
|
|
|
|
ptppaddr = VM_PAGE_TO_PHYS(nkpg);
|
|
|
|
|
newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
|
2008-08-21 02:57:02 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
|
|
|
|
|
mtx_lock_spin(&allpmaps_lock);
|
|
|
|
|
LIST_FOREACH(pmap, &allpmaps, pm_list)
|
|
|
|
|
PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
|
|
|
|
|
|
|
|
|
|
mtx_unlock_spin(&allpmaps_lock);
|
2008-08-21 02:57:02 +00:00
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
|
|
|
|
|
if (kernel_vm_end - 1 >= kernel_map->max_offset) {
|
|
|
|
|
kernel_vm_end = kernel_map->max_offset;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/***************************************************
|
|
|
|
|
* page management routines.
|
|
|
|
|
***************************************************/
|
|
|
|
|
|
|
|
|
|
CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
|
|
|
|
|
CTASSERT(_NPCM == 11);
|
|
|
|
|
|
|
|
|
|
static __inline struct pv_chunk *
|
|
|
|
|
pv_to_chunk(pv_entry_t pv)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
|
|
|
|
|
|
|
|
|
|
#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */
|
|
|
|
|
#define PC_FREE10 0x0000fffful /* Free values for index 10 */
|
|
|
|
|
|
|
|
|
|
static uint32_t pc_freemask[11] = {
|
|
|
|
|
PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
|
|
|
|
|
PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
|
|
|
|
|
PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
|
|
|
|
|
PC_FREE0_9, PC_FREE10
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
|
|
|
|
|
"Current number of pv entries");
|
|
|
|
|
|
|
|
|
|
#ifdef PV_STATS
|
|
|
|
|
static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
|
|
|
|
|
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
|
|
|
|
|
"Current number of pv entry chunks");
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
|
|
|
|
|
"Current number of pv entry chunks allocated");
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
|
|
|
|
|
"Current number of pv entry chunks frees");
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
|
|
|
|
|
"Number of times tried to get a chunk page but failed.");
|
|
|
|
|
|
|
|
|
|
static long pv_entry_frees, pv_entry_allocs;
|
|
|
|
|
static int pv_entry_spare;
|
|
|
|
|
|
|
|
|
|
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
|
|
|
|
|
"Current number of pv entry frees");
|
|
|
|
|
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
|
|
|
|
|
"Current number of pv entry allocs");
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
|
|
|
|
|
"Current number of spare pv entries");
|
|
|
|
|
|
|
|
|
|
static int pmap_collect_inactive, pmap_collect_active;
|
|
|
|
|
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
|
|
|
|
|
"Current number times pmap_collect called on inactive queue");
|
|
|
|
|
SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
|
|
|
|
|
"Current number times pmap_collect called on active queue");
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We are in a serious low memory condition. Resort to
|
|
|
|
|
* drastic measures to free some pages so we can allocate
|
|
|
|
|
* another pv entry chunk. This is normally called to
|
|
|
|
|
* unmap inactive pages, and if necessary, active pages.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
|
|
|
|
|
{
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
pt_entry_t *pte, tpte;
|
|
|
|
|
pv_entry_t next_pv, pv;
|
|
|
|
|
vm_offset_t va;
|
|
|
|
|
vm_page_t m, free;
|
|
|
|
|
|
|
|
|
|
sched_pin();
|
|
|
|
|
TAILQ_FOREACH(m, &vpq->pl, pageq) {
|
2011-07-02 23:42:04 +00:00
|
|
|
|
if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
continue;
|
|
|
|
|
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
|
|
|
|
|
va = pv->pv_va;
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
/* Avoid deadlock and lock recursion. */
|
|
|
|
|
if (pmap > locked_pmap)
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
|
|
|
|
|
continue;
|
|
|
|
|
pmap->pm_stats.resident_count--;
|
|
|
|
|
pte = pmap_pte_quick(pmap, va);
|
|
|
|
|
tpte = pte_load_clear(pte);
|
|
|
|
|
KASSERT((tpte & PG_W) == 0,
|
|
|
|
|
("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
|
|
|
|
|
if (tpte & PG_A)
|
|
|
|
|
vm_page_flag_set(m, PG_REFERENCED);
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
vm_page_dirty(m);
|
|
|
|
|
free = NULL;
|
|
|
|
|
pmap_unuse_pt(pmap, va, &free);
|
|
|
|
|
pmap_invalidate_page(pmap, va);
|
|
|
|
|
pmap_free_zero_pages(free);
|
|
|
|
|
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
|
|
|
|
|
free_pv_entry(pmap, pv);
|
|
|
|
|
if (pmap != locked_pmap)
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
2010-05-30 18:48:41 +00:00
|
|
|
|
if (TAILQ_EMPTY(&m->md.pv_list))
|
|
|
|
|
vm_page_flag_clear(m, PG_WRITEABLE);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
sched_unpin();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* free the pv_entry back to the free list
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
free_pv_entry(pmap_t pmap, pv_entry_t pv)
|
|
|
|
|
{
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
struct pv_chunk *pc;
|
|
|
|
|
int idx, field, bit;
|
|
|
|
|
|
|
|
|
|
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
|
|
|
|
|
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
|
|
|
|
PV_STAT(pv_entry_frees++);
|
|
|
|
|
PV_STAT(pv_entry_spare++);
|
|
|
|
|
pv_entry_count--;
|
|
|
|
|
pc = pv_to_chunk(pv);
|
|
|
|
|
idx = pv - &pc->pc_pventry[0];
|
|
|
|
|
field = idx / 32;
|
|
|
|
|
bit = idx % 32;
|
|
|
|
|
pc->pc_map[field] |= 1ul << bit;
|
|
|
|
|
/* move to head of list */
|
|
|
|
|
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
|
|
|
|
|
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
|
|
|
|
|
for (idx = 0; idx < _NPCM; idx++)
|
|
|
|
|
if (pc->pc_map[idx] != pc_freemask[idx])
|
|
|
|
|
return;
|
|
|
|
|
PV_STAT(pv_entry_spare -= _NPCPV);
|
|
|
|
|
PV_STAT(pc_chunk_count--);
|
|
|
|
|
PV_STAT(pc_chunk_frees++);
|
|
|
|
|
/* entire chunk is free, return it */
|
|
|
|
|
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
|
|
|
|
|
m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
|
|
|
|
|
pmap_qremove((vm_offset_t)pc, 1);
|
|
|
|
|
vm_page_unwire(m, 0);
|
|
|
|
|
vm_page_free(m);
|
|
|
|
|
pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* get a new pv_entry, allocating a block from the system
|
|
|
|
|
* when needed.
|
|
|
|
|
*/
|
|
|
|
|
static pv_entry_t
|
|
|
|
|
get_pv_entry(pmap_t pmap, int try)
|
|
|
|
|
{
|
|
|
|
|
static const struct timeval printinterval = { 60, 0 };
|
|
|
|
|
static struct timeval lastprint;
|
|
|
|
|
static vm_pindex_t colour;
|
|
|
|
|
struct vpgqueues *pq;
|
|
|
|
|
int bit, field;
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
struct pv_chunk *pc;
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
|
|
|
|
|
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
|
|
|
|
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
|
|
|
|
|
PV_STAT(pv_entry_allocs++);
|
|
|
|
|
pv_entry_count++;
|
|
|
|
|
if (pv_entry_count > pv_entry_high_water)
|
|
|
|
|
if (ratecheck(&lastprint, &printinterval))
|
|
|
|
|
printf("Approaching the limit on PV entries, consider "
|
|
|
|
|
"increasing either the vm.pmap.shpgperproc or the "
|
|
|
|
|
"vm.pmap.pv_entry_max tunable.\n");
|
|
|
|
|
pq = NULL;
|
|
|
|
|
retry:
|
|
|
|
|
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
|
|
|
|
|
if (pc != NULL) {
|
|
|
|
|
for (field = 0; field < _NPCM; field++) {
|
|
|
|
|
if (pc->pc_map[field]) {
|
|
|
|
|
bit = bsfl(pc->pc_map[field]);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (field < _NPCM) {
|
|
|
|
|
pv = &pc->pc_pventry[field * 32 + bit];
|
|
|
|
|
pc->pc_map[field] &= ~(1ul << bit);
|
|
|
|
|
/* If this was the last item, move it to tail */
|
|
|
|
|
for (field = 0; field < _NPCM; field++)
|
|
|
|
|
if (pc->pc_map[field] != 0) {
|
|
|
|
|
PV_STAT(pv_entry_spare--);
|
|
|
|
|
return (pv); /* not full, return */
|
|
|
|
|
}
|
|
|
|
|
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
|
|
|
|
|
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
|
|
|
|
|
PV_STAT(pv_entry_spare--);
|
|
|
|
|
return (pv);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/*
|
|
|
|
|
* Access to the ptelist "pv_vafree" is synchronized by the page
|
|
|
|
|
* queues lock. If "pv_vafree" is currently non-empty, it will
|
|
|
|
|
* remain non-empty until pmap_ptelist_alloc() completes.
|
|
|
|
|
*/
|
|
|
|
|
if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq ==
|
|
|
|
|
&vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) |
|
|
|
|
|
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
|
|
|
|
|
if (try) {
|
|
|
|
|
pv_entry_count--;
|
|
|
|
|
PV_STAT(pc_chunk_tryfail++);
|
|
|
|
|
return (NULL);
|
|
|
|
|
}
|
|
|
|
|
/*
|
|
|
|
|
* Reclaim pv entries: At first, destroy mappings to
|
|
|
|
|
* inactive pages. After that, if a pv chunk entry
|
|
|
|
|
* is still needed, destroy mappings to active pages.
|
|
|
|
|
*/
|
|
|
|
|
if (pq == NULL) {
|
|
|
|
|
PV_STAT(pmap_collect_inactive++);
|
|
|
|
|
pq = &vm_page_queues[PQ_INACTIVE];
|
|
|
|
|
} else if (pq == &vm_page_queues[PQ_INACTIVE]) {
|
|
|
|
|
PV_STAT(pmap_collect_active++);
|
|
|
|
|
pq = &vm_page_queues[PQ_ACTIVE];
|
|
|
|
|
} else
|
|
|
|
|
panic("get_pv_entry: increase vm.pmap.shpgperproc");
|
|
|
|
|
pmap_collect(pmap, pq);
|
|
|
|
|
goto retry;
|
|
|
|
|
}
|
|
|
|
|
PV_STAT(pc_chunk_count++);
|
|
|
|
|
PV_STAT(pc_chunk_allocs++);
|
|
|
|
|
colour++;
|
|
|
|
|
pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
|
|
|
|
|
pmap_qenter((vm_offset_t)pc, &m, 1);
|
|
|
|
|
if ((m->flags & PG_ZERO) == 0)
|
|
|
|
|
pagezero(pc);
|
|
|
|
|
pc->pc_pmap = pmap;
|
|
|
|
|
pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
|
|
|
|
|
for (field = 1; field < _NPCM; field++)
|
|
|
|
|
pc->pc_map[field] = pc_freemask[field];
|
|
|
|
|
pv = &pc->pc_pventry[0];
|
|
|
|
|
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
|
|
|
|
|
PV_STAT(pv_entry_spare += _NPCPV - 1);
|
|
|
|
|
return (pv);
|
|
|
|
|
}
|
|
|
|
|
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
static __inline pv_entry_t
|
|
|
|
|
pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
|
|
|
|
|
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
|
|
|
|
|
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
|
|
|
|
|
TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
break;
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
}
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
return (pv);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
pv = pmap_pvh_remove(pvh, pmap, va);
|
|
|
|
|
KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
|
|
|
|
|
free_pv_entry(pmap, pv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
|
|
|
|
|
{
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
pmap_pvh_free(&m->md, pmap, va);
|
|
|
|
|
if (TAILQ_EMPTY(&m->md.pv_list))
|
|
|
|
|
vm_page_flag_clear(m, PG_WRITEABLE);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Conditionally create a pv entry.
|
|
|
|
|
*/
|
|
|
|
|
static boolean_t
|
|
|
|
|
pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
|
|
|
|
|
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
|
|
|
|
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
|
|
|
|
|
if (pv_entry_count < pv_entry_high_water &&
|
|
|
|
|
(pv = get_pv_entry(pmap, TRUE)) != NULL) {
|
|
|
|
|
pv->pv_va = va;
|
|
|
|
|
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
|
|
|
|
|
return (TRUE);
|
|
|
|
|
} else
|
|
|
|
|
return (FALSE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pmap_remove_pte: do the things to unmap a page in a process
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
|
|
|
|
|
{
|
|
|
|
|
pt_entry_t oldpte;
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
|
|
|
|
|
CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x",
|
|
|
|
|
pmap, (u_long)*ptq, va);
|
|
|
|
|
|
|
|
|
|
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
|
|
|
|
|
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
|
|
|
|
oldpte = *ptq;
|
|
|
|
|
PT_SET_VA_MA(ptq, 0, TRUE);
|
|
|
|
|
if (oldpte & PG_W)
|
|
|
|
|
pmap->pm_stats.wired_count -= 1;
|
|
|
|
|
/*
|
|
|
|
|
* Machines that don't support invlpg, also don't support
|
|
|
|
|
* PG_G.
|
|
|
|
|
*/
|
|
|
|
|
if (oldpte & PG_G)
|
|
|
|
|
pmap_invalidate_page(kernel_pmap, va);
|
|
|
|
|
pmap->pm_stats.resident_count -= 1;
|
2010-12-28 14:36:32 +00:00
|
|
|
|
if (oldpte & PG_MANAGED) {
|
2008-08-12 19:48:18 +00:00
|
|
|
|
m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME);
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
vm_page_dirty(m);
|
|
|
|
|
if (oldpte & PG_A)
|
|
|
|
|
vm_page_flag_set(m, PG_REFERENCED);
|
|
|
|
|
pmap_remove_entry(pmap, m, va);
|
2010-12-28 14:36:32 +00:00
|
|
|
|
}
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return (pmap_unuse_pt(pmap, va, free));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remove a single page from a process address space
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
|
|
|
|
|
{
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
|
|
|
|
|
CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x",
|
|
|
|
|
pmap, va);
|
|
|
|
|
|
|
|
|
|
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
|
|
|
|
|
KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
|
|
|
|
|
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
|
|
|
|
if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0)
|
|
|
|
|
return;
|
|
|
|
|
pmap_remove_pte(pmap, pte, va, free);
|
|
|
|
|
pmap_invalidate_page(pmap, va);
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_MA(PADDR1, 0);
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remove the given range of addresses from the specified map.
|
|
|
|
|
*
|
|
|
|
|
* It is assumed that the start and end are properly
|
|
|
|
|
* rounded to the page size.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t pdnxt;
|
|
|
|
|
pd_entry_t ptpaddr;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
vm_page_t free = NULL;
|
|
|
|
|
int anyvalid;
|
|
|
|
|
|
|
|
|
|
CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
|
|
|
|
|
pmap, sva, eva);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Perform an unsynchronized read. This is, however, safe.
|
|
|
|
|
*/
|
|
|
|
|
if (pmap->pm_stats.resident_count == 0)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
anyvalid = 0;
|
|
|
|
|
|
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
sched_pin();
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* special handling of removing one page. a very
|
|
|
|
|
* common operation and easy to short circuit some
|
|
|
|
|
* code.
|
|
|
|
|
*/
|
|
|
|
|
if ((sva + PAGE_SIZE == eva) &&
|
|
|
|
|
((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
|
|
|
|
|
pmap_remove_page(pmap, sva, &free);
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (; sva < eva; sva = pdnxt) {
|
|
|
|
|
unsigned pdirindex;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate index for next page table.
|
|
|
|
|
*/
|
|
|
|
|
pdnxt = (sva + NBPDR) & ~PDRMASK;
|
|
|
|
|
if (pmap->pm_stats.resident_count == 0)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
pdirindex = sva >> PDRSHIFT;
|
|
|
|
|
ptpaddr = pmap->pm_pdir[pdirindex];
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Weed out invalid mappings. Note: we assume that the page
|
|
|
|
|
* directory table is always allocated, and in kernel virtual.
|
|
|
|
|
*/
|
|
|
|
|
if (ptpaddr == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check for large page.
|
|
|
|
|
*/
|
|
|
|
|
if ((ptpaddr & PG_PS) != 0) {
|
|
|
|
|
PD_CLEAR_VA(pmap, pdirindex, TRUE);
|
|
|
|
|
pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
|
|
|
|
|
anyvalid = 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Limit our scan to either the end of the va represented
|
|
|
|
|
* by the current page table page, or to the end of the
|
|
|
|
|
* range being removed.
|
|
|
|
|
*/
|
|
|
|
|
if (pdnxt > eva)
|
|
|
|
|
pdnxt = eva;
|
|
|
|
|
|
|
|
|
|
for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
|
|
|
|
|
sva += PAGE_SIZE) {
|
|
|
|
|
if ((*pte & PG_V) == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The TLB entry for a PG_G mapping is invalidated
|
|
|
|
|
* by pmap_remove_pte().
|
|
|
|
|
*/
|
|
|
|
|
if ((*pte & PG_G) == 0)
|
|
|
|
|
anyvalid = 1;
|
|
|
|
|
if (pmap_remove_pte(pmap, pte, sva, &free))
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_VA_MA(PMAP1, 0, TRUE);
|
|
|
|
|
out:
|
|
|
|
|
if (anyvalid)
|
|
|
|
|
pmap_invalidate_all(pmap);
|
|
|
|
|
sched_unpin();
|
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
pmap_free_zero_pages(free);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Routine: pmap_remove_all
|
|
|
|
|
* Function:
|
|
|
|
|
* Removes this physical page from
|
|
|
|
|
* all physical maps in which it resides.
|
|
|
|
|
* Reflects back modify bits to the pager.
|
|
|
|
|
*
|
|
|
|
|
* Notes:
|
|
|
|
|
* Original versions of this routine were very
|
|
|
|
|
* inefficient because they iteratively called
|
|
|
|
|
* pmap_remove (slow...)
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_remove_all(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
pt_entry_t *pte, tpte;
|
|
|
|
|
vm_page_t free;
|
|
|
|
|
|
2011-08-09 21:01:36 +00:00
|
|
|
|
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
|
2011-06-29 16:40:41 +00:00
|
|
|
|
("pmap_remove_all: page %p is not managed", m));
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
free = NULL;
|
2010-05-08 20:34:01 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
sched_pin();
|
|
|
|
|
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pmap->pm_stats.resident_count--;
|
|
|
|
|
pte = pmap_pte_quick(pmap, pv->pv_va);
|
|
|
|
|
|
|
|
|
|
tpte = *pte;
|
|
|
|
|
PT_SET_VA_MA(pte, 0, TRUE);
|
|
|
|
|
if (tpte & PG_W)
|
|
|
|
|
pmap->pm_stats.wired_count--;
|
|
|
|
|
if (tpte & PG_A)
|
|
|
|
|
vm_page_flag_set(m, PG_REFERENCED);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Update the vm_page_t clean and reference bits.
|
|
|
|
|
*/
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
|
2008-08-12 19:48:18 +00:00
|
|
|
|
vm_page_dirty(m);
|
|
|
|
|
pmap_unuse_pt(pmap, pv->pv_va, &free);
|
|
|
|
|
pmap_invalidate_page(pmap, pv->pv_va);
|
|
|
|
|
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
|
|
|
|
|
free_pv_entry(pmap, pv);
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
vm_page_flag_clear(m, PG_WRITEABLE);
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_MA(PADDR1, 0);
|
|
|
|
|
sched_unpin();
|
2010-05-08 20:34:01 +00:00
|
|
|
|
vm_page_unlock_queues();
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
pmap_free_zero_pages(free);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set the physical protection on the
|
|
|
|
|
* specified range of this map as requested.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t pdnxt;
|
|
|
|
|
pd_entry_t ptpaddr;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
int anychanged;
|
|
|
|
|
|
|
|
|
|
CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x",
|
|
|
|
|
pmap, sva, eva, prot);
|
|
|
|
|
|
|
|
|
|
if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
|
|
|
|
|
pmap_remove(pmap, sva, eva);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
|
|
|
|
|
(VM_PROT_WRITE|VM_PROT_EXECUTE))
|
|
|
|
|
return;
|
|
|
|
|
#else
|
|
|
|
|
if (prot & VM_PROT_WRITE)
|
|
|
|
|
return;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
anychanged = 0;
|
|
|
|
|
|
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
sched_pin();
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
for (; sva < eva; sva = pdnxt) {
|
|
|
|
|
pt_entry_t obits, pbits;
|
|
|
|
|
unsigned pdirindex;
|
|
|
|
|
|
|
|
|
|
pdnxt = (sva + NBPDR) & ~PDRMASK;
|
|
|
|
|
|
|
|
|
|
pdirindex = sva >> PDRSHIFT;
|
|
|
|
|
ptpaddr = pmap->pm_pdir[pdirindex];
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Weed out invalid mappings. Note: we assume that the page
|
|
|
|
|
* directory table is always allocated, and in kernel virtual.
|
|
|
|
|
*/
|
|
|
|
|
if (ptpaddr == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check for large page.
|
|
|
|
|
*/
|
|
|
|
|
if ((ptpaddr & PG_PS) != 0) {
|
|
|
|
|
if ((prot & VM_PROT_WRITE) == 0)
|
|
|
|
|
pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
if ((prot & VM_PROT_EXECUTE) == 0)
|
|
|
|
|
pmap->pm_pdir[pdirindex] |= pg_nx;
|
|
|
|
|
#endif
|
|
|
|
|
anychanged = 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pdnxt > eva)
|
|
|
|
|
pdnxt = eva;
|
|
|
|
|
|
|
|
|
|
for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
|
|
|
|
|
sva += PAGE_SIZE) {
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
|
|
|
|
|
retry:
|
|
|
|
|
/*
|
|
|
|
|
* Regardless of whether a pte is 32 or 64 bits in
|
|
|
|
|
* size, PG_RW, PG_A, and PG_M are among the least
|
|
|
|
|
* significant 32 bits.
|
|
|
|
|
*/
|
|
|
|
|
obits = pbits = *pte;
|
|
|
|
|
if ((pbits & PG_V) == 0)
|
|
|
|
|
continue;
|
2010-04-27 05:35:35 +00:00
|
|
|
|
|
|
|
|
|
if ((prot & VM_PROT_WRITE) == 0) {
|
|
|
|
|
if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
|
|
|
|
|
(PG_MANAGED | PG_M | PG_RW)) {
|
|
|
|
|
m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) &
|
|
|
|
|
PG_FRAME);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
vm_page_dirty(m);
|
|
|
|
|
}
|
|
|
|
|
pbits &= ~(PG_RW | PG_M);
|
2010-04-27 05:35:35 +00:00
|
|
|
|
}
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#ifdef PAE
|
|
|
|
|
if ((prot & VM_PROT_EXECUTE) == 0)
|
|
|
|
|
pbits |= pg_nx;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (pbits != obits) {
|
|
|
|
|
obits = *pte;
|
|
|
|
|
PT_SET_VA_MA(pte, pbits, TRUE);
|
|
|
|
|
if (*pte != pbits)
|
|
|
|
|
goto retry;
|
|
|
|
|
if (obits & PG_G)
|
|
|
|
|
pmap_invalidate_page(pmap, sva);
|
|
|
|
|
else
|
|
|
|
|
anychanged = 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_VA_MA(PMAP1, 0, TRUE);
|
|
|
|
|
if (anychanged)
|
|
|
|
|
pmap_invalidate_all(pmap);
|
|
|
|
|
sched_unpin();
|
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Insert the given physical page (p) at
|
|
|
|
|
* the specified virtual address (v) in the
|
|
|
|
|
* target physical map with the protection requested.
|
|
|
|
|
*
|
|
|
|
|
* If specified, the page will be wired down, meaning
|
|
|
|
|
* that the related pte can not be reclaimed.
|
|
|
|
|
*
|
|
|
|
|
* NB: This is the only routine which MAY NOT lazy-evaluate
|
|
|
|
|
* or lose information. That is, this routine must actually
|
|
|
|
|
* insert this page into the given map NOW.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
|
|
|
|
|
vm_prot_t prot, boolean_t wired)
|
|
|
|
|
{
|
|
|
|
|
pd_entry_t *pde;
|
|
|
|
|
pt_entry_t *pte;
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
pt_entry_t newpte, origpte;
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
vm_paddr_t opa, pa;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
vm_page_t mpte, om;
|
|
|
|
|
boolean_t invlva;
|
|
|
|
|
|
|
|
|
|
CTR6(KTR_PMAP, "pmap_enter: pmap=%08p va=0x%08x access=0x%x ma=0x%08x prot=0x%x wired=%d",
|
2010-11-20 20:04:29 +00:00
|
|
|
|
pmap, va, access, VM_PAGE_TO_MACH(m), prot, wired);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
va = trunc_page(va);
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
|
|
|
|
|
KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
|
2010-05-16 23:45:10 +00:00
|
|
|
|
("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
|
|
|
|
|
va));
|
2011-08-09 21:01:36 +00:00
|
|
|
|
KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0,
|
2010-05-16 23:45:10 +00:00
|
|
|
|
("pmap_enter: page %p is not busy", m));
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
mpte = NULL;
|
|
|
|
|
|
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
sched_pin();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* In the case that a page table page is not
|
|
|
|
|
* resident, we are creating it here.
|
|
|
|
|
*/
|
|
|
|
|
if (va < VM_MAXUSER_ADDRESS) {
|
|
|
|
|
mpte = pmap_allocpte(pmap, va, M_WAITOK);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pde = pmap_pde(pmap, va);
|
|
|
|
|
if ((*pde & PG_PS) != 0)
|
|
|
|
|
panic("pmap_enter: attempted pmap_enter on 4MB page");
|
|
|
|
|
pte = pmap_pte_quick(pmap, va);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Page Directory table entry not valid, we need a new PT page
|
|
|
|
|
*/
|
|
|
|
|
if (pte == NULL) {
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
|
2008-08-12 19:48:18 +00:00
|
|
|
|
(uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pa = VM_PAGE_TO_PHYS(m);
|
|
|
|
|
om = NULL;
|
|
|
|
|
opa = origpte = 0;
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx",
|
|
|
|
|
pte, *pte));
|
|
|
|
|
#endif
|
|
|
|
|
origpte = *pte;
|
|
|
|
|
if (origpte)
|
|
|
|
|
origpte = xpmap_mtop(origpte);
|
|
|
|
|
opa = origpte & PG_FRAME;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Mapping has not changed, must be protection or wiring change.
|
|
|
|
|
*/
|
|
|
|
|
if (origpte && (opa == pa)) {
|
|
|
|
|
/*
|
|
|
|
|
* Wiring change, just update stats. We don't worry about
|
|
|
|
|
* wiring PT pages as they remain resident as long as there
|
|
|
|
|
* are valid mappings in them. Hence, if a user page is wired,
|
|
|
|
|
* the PT page will be also.
|
|
|
|
|
*/
|
|
|
|
|
if (wired && ((origpte & PG_W) == 0))
|
|
|
|
|
pmap->pm_stats.wired_count++;
|
|
|
|
|
else if (!wired && (origpte & PG_W))
|
|
|
|
|
pmap->pm_stats.wired_count--;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remove extra pte reference
|
|
|
|
|
*/
|
|
|
|
|
if (mpte)
|
|
|
|
|
mpte->wire_count--;
|
|
|
|
|
|
|
|
|
|
if (origpte & PG_MANAGED) {
|
|
|
|
|
om = m;
|
|
|
|
|
pa |= PG_MANAGED;
|
|
|
|
|
}
|
|
|
|
|
goto validate;
|
|
|
|
|
}
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
|
|
|
|
|
pv = NULL;
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* Mapping has changed, invalidate old range and fall through to
|
|
|
|
|
* handle validating new mapping.
|
|
|
|
|
*/
|
|
|
|
|
if (opa) {
|
|
|
|
|
if (origpte & PG_W)
|
|
|
|
|
pmap->pm_stats.wired_count--;
|
|
|
|
|
if (origpte & PG_MANAGED) {
|
|
|
|
|
om = PHYS_TO_VM_PAGE(opa);
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
pv = pmap_pvh_remove(&om->md, pmap, va);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
} else if (va < VM_MAXUSER_ADDRESS)
|
|
|
|
|
printf("va=0x%x is unmanaged :-( \n", va);
|
|
|
|
|
|
|
|
|
|
if (mpte != NULL) {
|
|
|
|
|
mpte->wire_count--;
|
|
|
|
|
KASSERT(mpte->wire_count > 0,
|
|
|
|
|
("pmap_enter: missing reference to page table page,"
|
|
|
|
|
" va: 0x%x", va));
|
|
|
|
|
}
|
|
|
|
|
} else
|
|
|
|
|
pmap->pm_stats.resident_count++;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Enter on the PV list if part of our managed memory.
|
|
|
|
|
*/
|
2011-08-09 21:01:36 +00:00
|
|
|
|
if ((m->oflags & VPO_UNMANAGED) == 0) {
|
2008-08-12 19:48:18 +00:00
|
|
|
|
KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
|
|
|
|
|
("pmap_enter: managed mapping within the clean submap"));
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
if (pv == NULL)
|
|
|
|
|
pv = get_pv_entry(pmap, FALSE);
|
|
|
|
|
pv->pv_va = va;
|
|
|
|
|
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pa |= PG_MANAGED;
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
} else if (pv != NULL)
|
|
|
|
|
free_pv_entry(pmap, pv);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Increment counters
|
|
|
|
|
*/
|
|
|
|
|
if (wired)
|
|
|
|
|
pmap->pm_stats.wired_count++;
|
|
|
|
|
|
|
|
|
|
validate:
|
|
|
|
|
/*
|
|
|
|
|
* Now validate mapping with desired protection/wiring.
|
|
|
|
|
*/
|
|
|
|
|
newpte = (pt_entry_t)(pa | PG_V);
|
|
|
|
|
if ((prot & VM_PROT_WRITE) != 0) {
|
|
|
|
|
newpte |= PG_RW;
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
if ((newpte & PG_MANAGED) != 0)
|
|
|
|
|
vm_page_flag_set(m, PG_WRITEABLE);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
if ((prot & VM_PROT_EXECUTE) == 0)
|
|
|
|
|
newpte |= pg_nx;
|
|
|
|
|
#endif
|
|
|
|
|
if (wired)
|
|
|
|
|
newpte |= PG_W;
|
|
|
|
|
if (va < VM_MAXUSER_ADDRESS)
|
|
|
|
|
newpte |= PG_U;
|
|
|
|
|
if (pmap == kernel_pmap)
|
|
|
|
|
newpte |= pgeflag;
|
|
|
|
|
|
|
|
|
|
critical_enter();
|
|
|
|
|
/*
|
|
|
|
|
* if the mapping or permission bits are different, we need
|
|
|
|
|
* to update the pte.
|
|
|
|
|
*/
|
|
|
|
|
if ((origpte & ~(PG_M|PG_A)) != newpte) {
|
|
|
|
|
if (origpte) {
|
|
|
|
|
invlva = FALSE;
|
|
|
|
|
origpte = *pte;
|
|
|
|
|
PT_SET_VA(pte, newpte | PG_A, FALSE);
|
|
|
|
|
if (origpte & PG_A) {
|
|
|
|
|
if (origpte & PG_MANAGED)
|
|
|
|
|
vm_page_flag_set(om, PG_REFERENCED);
|
|
|
|
|
if (opa != VM_PAGE_TO_PHYS(m))
|
|
|
|
|
invlva = TRUE;
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
if ((origpte & PG_NX) == 0 &&
|
|
|
|
|
(newpte & PG_NX) != 0)
|
|
|
|
|
invlva = TRUE;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
|
2008-08-12 19:48:18 +00:00
|
|
|
|
if ((origpte & PG_MANAGED) != 0)
|
|
|
|
|
vm_page_dirty(om);
|
|
|
|
|
if ((prot & VM_PROT_WRITE) == 0)
|
|
|
|
|
invlva = TRUE;
|
|
|
|
|
}
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
if ((origpte & PG_MANAGED) != 0 &&
|
|
|
|
|
TAILQ_EMPTY(&om->md.pv_list))
|
|
|
|
|
vm_page_flag_clear(om, PG_WRITEABLE);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
if (invlva)
|
|
|
|
|
pmap_invalidate_page(pmap, va);
|
|
|
|
|
} else{
|
|
|
|
|
PT_SET_VA(pte, newpte | PG_A, FALSE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
critical_exit();
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_VA_MA(PMAP1, 0, TRUE);
|
|
|
|
|
sched_unpin();
|
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Maps a sequence of resident pages belonging to the same object.
|
|
|
|
|
* The sequence begins with the given page m_start. This page is
|
|
|
|
|
* mapped at the given virtual address start. Each subsequent page is
|
|
|
|
|
* mapped at a virtual address that is offset from start by the same
|
|
|
|
|
* amount as the page is offset from m_start within the object. The
|
|
|
|
|
* last page in the sequence is the page with the largest offset from
|
|
|
|
|
* m_start that can be mapped at a virtual address less than the given
|
|
|
|
|
* virtual address end. Not every virtual page between start and end
|
|
|
|
|
* is mapped; only those for which a resident page exists with the
|
|
|
|
|
* corresponding offset from m_start are mapped.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
|
|
|
|
|
vm_page_t m_start, vm_prot_t prot)
|
|
|
|
|
{
|
|
|
|
|
vm_page_t m, mpte;
|
|
|
|
|
vm_pindex_t diff, psize;
|
|
|
|
|
multicall_entry_t mcl[16];
|
|
|
|
|
multicall_entry_t *mclp = mcl;
|
|
|
|
|
int error, count = 0;
|
|
|
|
|
|
|
|
|
|
VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
|
|
|
|
|
psize = atop(end - start);
|
|
|
|
|
|
|
|
|
|
mpte = NULL;
|
|
|
|
|
m = m_start;
|
2010-05-26 18:00:44 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
|
|
|
|
|
mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m,
|
|
|
|
|
prot, mpte);
|
|
|
|
|
m = TAILQ_NEXT(m, listq);
|
|
|
|
|
if (count == 16) {
|
|
|
|
|
error = HYPERVISOR_multicall(mcl, count);
|
|
|
|
|
KASSERT(error == 0, ("bad multicall %d", error));
|
|
|
|
|
mclp = mcl;
|
|
|
|
|
count = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (count) {
|
|
|
|
|
error = HYPERVISOR_multicall(mcl, count);
|
|
|
|
|
KASSERT(error == 0, ("bad multicall %d", error));
|
|
|
|
|
}
|
2010-05-26 18:00:44 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* this code makes some *MAJOR* assumptions:
|
|
|
|
|
* 1. Current pmap & pmap exists.
|
|
|
|
|
* 2. Not wired.
|
|
|
|
|
* 3. Read access.
|
|
|
|
|
* 4. No page table pages.
|
|
|
|
|
* but is *MUCH* faster than pmap_enter...
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
|
|
|
|
|
{
|
|
|
|
|
multicall_entry_t mcl, *mclp;
|
|
|
|
|
int count = 0;
|
|
|
|
|
mclp = &mcl;
|
|
|
|
|
|
|
|
|
|
CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
|
|
|
|
|
pmap, va, m, prot);
|
|
|
|
|
|
2010-05-08 20:34:01 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PMAP_LOCK(pmap);
|
2010-05-08 20:34:01 +00:00
|
|
|
|
(void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
if (count)
|
|
|
|
|
HYPERVISOR_multicall(&mcl, count);
|
2010-05-08 20:34:01 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
|
2008-08-15 04:00:44 +00:00
|
|
|
|
#ifdef notyet
|
2008-08-12 19:48:18 +00:00
|
|
|
|
void
|
|
|
|
|
pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count)
|
|
|
|
|
{
|
|
|
|
|
int i, error, index = 0;
|
|
|
|
|
multicall_entry_t mcl[16];
|
|
|
|
|
multicall_entry_t *mclp = mcl;
|
|
|
|
|
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
for (i = 0; i < count; i++, addrs++, pages++, prots++) {
|
|
|
|
|
if (!pmap_is_prefaultable_locked(pmap, *addrs))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
(void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL);
|
|
|
|
|
if (index == 16) {
|
|
|
|
|
error = HYPERVISOR_multicall(mcl, index);
|
|
|
|
|
mclp = mcl;
|
|
|
|
|
index = 0;
|
|
|
|
|
KASSERT(error == 0, ("bad multicall %d", error));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (index) {
|
|
|
|
|
error = HYPERVISOR_multicall(mcl, index);
|
|
|
|
|
KASSERT(error == 0, ("bad multicall %d", error));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
2008-08-15 04:00:44 +00:00
|
|
|
|
#endif
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
static vm_page_t
|
|
|
|
|
pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m,
|
|
|
|
|
vm_prot_t prot, vm_page_t mpte)
|
|
|
|
|
{
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
vm_paddr_t pa;
|
|
|
|
|
vm_page_t free;
|
|
|
|
|
multicall_entry_t *mcl = *mclpp;
|
|
|
|
|
|
|
|
|
|
KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
|
2011-08-09 21:01:36 +00:00
|
|
|
|
(m->oflags & VPO_UNMANAGED) != 0,
|
2008-08-12 19:48:18 +00:00
|
|
|
|
("pmap_enter_quick_locked: managed mapping within the clean submap"));
|
|
|
|
|
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
|
|
|
|
|
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* In the case that a page table page is not
|
|
|
|
|
* resident, we are creating it here.
|
|
|
|
|
*/
|
|
|
|
|
if (va < VM_MAXUSER_ADDRESS) {
|
|
|
|
|
unsigned ptepindex;
|
|
|
|
|
pd_entry_t ptema;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate pagetable page index
|
|
|
|
|
*/
|
|
|
|
|
ptepindex = va >> PDRSHIFT;
|
|
|
|
|
if (mpte && (mpte->pindex == ptepindex)) {
|
|
|
|
|
mpte->wire_count++;
|
|
|
|
|
} else {
|
|
|
|
|
/*
|
|
|
|
|
* Get the page directory entry
|
|
|
|
|
*/
|
|
|
|
|
ptema = pmap->pm_pdir[ptepindex];
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the page table page is mapped, we just increment
|
|
|
|
|
* the hold count, and activate it.
|
|
|
|
|
*/
|
|
|
|
|
if (ptema & PG_V) {
|
|
|
|
|
if (ptema & PG_PS)
|
|
|
|
|
panic("pmap_enter_quick: unexpected mapping into 4MB page");
|
|
|
|
|
mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
|
|
|
|
|
mpte->wire_count++;
|
|
|
|
|
} else {
|
|
|
|
|
mpte = _pmap_allocpte(pmap, ptepindex,
|
|
|
|
|
M_NOWAIT);
|
|
|
|
|
if (mpte == NULL)
|
|
|
|
|
return (mpte);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
mpte = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This call to vtopte makes the assumption that we are
|
|
|
|
|
* entering the page into the current pmap. In order to support
|
|
|
|
|
* quick entry into any pmap, one would likely use pmap_pte_quick.
|
|
|
|
|
* But that isn't as quick as vtopte.
|
|
|
|
|
*/
|
|
|
|
|
KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap"));
|
|
|
|
|
pte = vtopte(va);
|
|
|
|
|
if (*pte & PG_V) {
|
|
|
|
|
if (mpte != NULL) {
|
|
|
|
|
mpte->wire_count--;
|
|
|
|
|
mpte = NULL;
|
|
|
|
|
}
|
|
|
|
|
return (mpte);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Enter on the PV list if part of our managed memory.
|
|
|
|
|
*/
|
2011-08-09 21:01:36 +00:00
|
|
|
|
if ((m->oflags & VPO_UNMANAGED) == 0 &&
|
2008-08-12 19:48:18 +00:00
|
|
|
|
!pmap_try_insert_pv_entry(pmap, va, m)) {
|
|
|
|
|
if (mpte != NULL) {
|
|
|
|
|
free = NULL;
|
|
|
|
|
if (pmap_unwire_pte_hold(pmap, mpte, &free)) {
|
|
|
|
|
pmap_invalidate_page(pmap, va);
|
|
|
|
|
pmap_free_zero_pages(free);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mpte = NULL;
|
|
|
|
|
}
|
|
|
|
|
return (mpte);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Increment counters
|
|
|
|
|
*/
|
|
|
|
|
pmap->pm_stats.resident_count++;
|
|
|
|
|
|
|
|
|
|
pa = VM_PAGE_TO_PHYS(m);
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
if ((prot & VM_PROT_EXECUTE) == 0)
|
|
|
|
|
pa |= pg_nx;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
/*
|
|
|
|
|
* Now validate mapping with RO protection
|
|
|
|
|
*/
|
2011-08-09 21:01:36 +00:00
|
|
|
|
if ((m->oflags & VPO_UNMANAGED) != 0)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pte_store(pte, pa | PG_V | PG_U);
|
|
|
|
|
else
|
|
|
|
|
pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
|
|
|
|
|
#else
|
|
|
|
|
/*
|
|
|
|
|
* Now validate mapping with RO protection
|
|
|
|
|
*/
|
2011-08-09 21:01:36 +00:00
|
|
|
|
if ((m->oflags & VPO_UNMANAGED) != 0)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pa = xpmap_ptom(pa | PG_V | PG_U);
|
|
|
|
|
else
|
|
|
|
|
pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED);
|
|
|
|
|
|
|
|
|
|
mcl->op = __HYPERVISOR_update_va_mapping;
|
|
|
|
|
mcl->args[0] = va;
|
|
|
|
|
mcl->args[1] = (uint32_t)(pa & 0xffffffff);
|
|
|
|
|
mcl->args[2] = (uint32_t)(pa >> 32);
|
|
|
|
|
mcl->args[3] = 0;
|
|
|
|
|
*mclpp = mcl + 1;
|
|
|
|
|
*count = *count + 1;
|
|
|
|
|
#endif
|
|
|
|
|
return mpte;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Make a temporary mapping for a physical address. This is only intended
|
|
|
|
|
* to be used for panic dumps.
|
|
|
|
|
*/
|
|
|
|
|
void *
|
|
|
|
|
pmap_kenter_temporary(vm_paddr_t pa, int i)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t va;
|
2009-12-10 03:09:35 +00:00
|
|
|
|
vm_paddr_t ma = xpmap_ptom(pa);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
|
2009-12-10 03:09:35 +00:00
|
|
|
|
PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
invlpg(va);
|
|
|
|
|
return ((void *)crashdumpmap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This code maps large physical mmap regions into the
|
|
|
|
|
* processor address space. Note that some shortcuts
|
|
|
|
|
* are taken, but the code works.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
|
|
|
|
|
vm_object_t object, vm_pindex_t pindex,
|
|
|
|
|
vm_size_t size)
|
|
|
|
|
{
|
2010-04-30 03:26:12 +00:00
|
|
|
|
pd_entry_t *pde;
|
|
|
|
|
vm_paddr_t pa, ptepa;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
vm_page_t p;
|
2010-04-30 03:26:12 +00:00
|
|
|
|
int pat_mode;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
|
2009-07-24 13:50:29 +00:00
|
|
|
|
KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
|
2008-08-12 19:48:18 +00:00
|
|
|
|
("pmap_object_init_pt: non-device object"));
|
|
|
|
|
if (pseflag &&
|
2010-04-30 03:26:12 +00:00
|
|
|
|
(addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
|
|
|
|
|
if (!vm_object_populate(object, pindex, pindex + atop(size)))
|
|
|
|
|
return;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
p = vm_page_lookup(object, pindex);
|
2010-04-30 03:26:12 +00:00
|
|
|
|
KASSERT(p->valid == VM_PAGE_BITS_ALL,
|
|
|
|
|
("pmap_object_init_pt: invalid page %p", p));
|
|
|
|
|
pat_mode = p->md.pat_mode;
|
|
|
|
|
/*
|
|
|
|
|
* Abort the mapping if the first page is not physically
|
|
|
|
|
* aligned to a 2/4MB page boundary.
|
|
|
|
|
*/
|
2008-08-12 19:48:18 +00:00
|
|
|
|
ptepa = VM_PAGE_TO_PHYS(p);
|
|
|
|
|
if (ptepa & (NBPDR - 1))
|
|
|
|
|
return;
|
2010-04-30 03:26:12 +00:00
|
|
|
|
/*
|
|
|
|
|
* Skip the first page. Abort the mapping if the rest of
|
|
|
|
|
* the pages are not physically contiguous or have differing
|
|
|
|
|
* memory attributes.
|
|
|
|
|
*/
|
|
|
|
|
p = TAILQ_NEXT(p, listq);
|
|
|
|
|
for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
|
|
|
|
|
pa += PAGE_SIZE) {
|
|
|
|
|
KASSERT(p->valid == VM_PAGE_BITS_ALL,
|
|
|
|
|
("pmap_object_init_pt: invalid page %p", p));
|
|
|
|
|
if (pa != VM_PAGE_TO_PHYS(p) ||
|
|
|
|
|
pat_mode != p->md.pat_mode)
|
|
|
|
|
return;
|
|
|
|
|
p = TAILQ_NEXT(p, listq);
|
|
|
|
|
}
|
|
|
|
|
/* Map using 2/4MB pages. */
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PMAP_LOCK(pmap);
|
2010-04-30 03:26:12 +00:00
|
|
|
|
for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
|
|
|
|
|
size; pa += NBPDR) {
|
|
|
|
|
pde = pmap_pde(pmap, addr);
|
|
|
|
|
if (*pde == 0) {
|
|
|
|
|
pde_store(pde, pa | PG_PS | PG_M | PG_A |
|
|
|
|
|
PG_U | PG_RW | PG_V);
|
|
|
|
|
pmap->pm_stats.resident_count += NBPDR /
|
|
|
|
|
PAGE_SIZE;
|
|
|
|
|
pmap_pde_mappings++;
|
|
|
|
|
}
|
|
|
|
|
/* Else continue on if the PDE is already valid. */
|
|
|
|
|
addr += NBPDR;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Routine: pmap_change_wiring
|
|
|
|
|
* Function: Change the wiring attribute for a map/virtual-address
|
|
|
|
|
* pair.
|
|
|
|
|
* In/out conditions:
|
|
|
|
|
* The mapping must already exist in the pmap.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
|
|
|
|
|
{
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
|
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pte = pmap_pte(pmap, va);
|
|
|
|
|
|
|
|
|
|
if (wired && !pmap_pte_w(pte)) {
|
|
|
|
|
PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE);
|
|
|
|
|
pmap->pm_stats.wired_count++;
|
|
|
|
|
} else if (!wired && pmap_pte_w(pte)) {
|
|
|
|
|
PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE);
|
|
|
|
|
pmap->pm_stats.wired_count--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Wiring is not a hardware characteristic so there is no need to
|
|
|
|
|
* invalidate TLB.
|
|
|
|
|
*/
|
|
|
|
|
pmap_pte_release(pte);
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Copy the range specified by src_addr/len
|
|
|
|
|
* from the source map to the range dst_addr/len
|
|
|
|
|
* in the destination map.
|
|
|
|
|
*
|
|
|
|
|
* This routine is only advisory and need not do anything.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
|
|
|
|
|
vm_offset_t src_addr)
|
|
|
|
|
{
|
|
|
|
|
vm_page_t free;
|
|
|
|
|
vm_offset_t addr;
|
|
|
|
|
vm_offset_t end_addr = src_addr + len;
|
|
|
|
|
vm_offset_t pdnxt;
|
|
|
|
|
|
|
|
|
|
if (dst_addr != src_addr)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (!pmap_is_current(src_pmap)) {
|
|
|
|
|
CTR2(KTR_PMAP,
|
|
|
|
|
"pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx",
|
|
|
|
|
(src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME));
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x",
|
|
|
|
|
dst_pmap, src_pmap, dst_addr, len, src_addr);
|
|
|
|
|
|
2011-01-04 15:55:15 +00:00
|
|
|
|
#ifdef HAMFISTED_LOCKING
|
|
|
|
|
mtx_lock(&createdelete_lock);
|
|
|
|
|
#endif
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
if (dst_pmap < src_pmap) {
|
|
|
|
|
PMAP_LOCK(dst_pmap);
|
|
|
|
|
PMAP_LOCK(src_pmap);
|
|
|
|
|
} else {
|
|
|
|
|
PMAP_LOCK(src_pmap);
|
|
|
|
|
PMAP_LOCK(dst_pmap);
|
|
|
|
|
}
|
|
|
|
|
sched_pin();
|
|
|
|
|
for (addr = src_addr; addr < end_addr; addr = pdnxt) {
|
|
|
|
|
pt_entry_t *src_pte, *dst_pte;
|
|
|
|
|
vm_page_t dstmpte, srcmpte;
|
|
|
|
|
pd_entry_t srcptepaddr;
|
|
|
|
|
unsigned ptepindex;
|
|
|
|
|
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
KASSERT(addr < UPT_MIN_ADDRESS,
|
|
|
|
|
("pmap_copy: invalid to pmap_copy page tables"));
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
pdnxt = (addr + NBPDR) & ~PDRMASK;
|
|
|
|
|
ptepindex = addr >> PDRSHIFT;
|
|
|
|
|
|
|
|
|
|
srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
|
|
|
|
|
if (srcptepaddr == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (srcptepaddr & PG_PS) {
|
|
|
|
|
if (dst_pmap->pm_pdir[ptepindex] == 0) {
|
|
|
|
|
PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE);
|
|
|
|
|
dst_pmap->pm_stats.resident_count +=
|
|
|
|
|
NBPDR / PAGE_SIZE;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
|
Merge various changes from i386/i386/pmap.c:
The remaining, unmerged portions of r175404
Retire PMAP_DIAGNOSTIC. Any useful diagnostics that were conditionally
compiled under PMAP_DIAGNOSTIC are now KASSERT()s. (Note: The kernel
option DIAGNOSTIC still disables inlining of certain pmap functions.)
Eliminate dead code from pmap_enter(). This code implemented an assertion.
On i386, an equivalent check is already implemented. However, on amd64,
a small change is required to implement an equivalent check.
Eliminate \n from a nearby panic string.
Use KASSERT() to reimplement pmap_copy()'s two assertions.
Merge portions of r177659
To date, we have assumed that the TLB will only set the PG_M bit in a
PTE if that PTE has the PG_RW bit set. However, this assumption does
not hold on recent processors from Intel. For example, consider a PTE
that has the PG_RW bit set but the PG_M bit clear. Suppose this PTE
is cached in the TLB and later the PG_RW bit is cleared in the PTE,
but the corresponding TLB entry is not (yet) invalidated.
Historically, upon a write access using this (stale) TLB entry, the
TLB would observe that the PG_RW bit had been cleared and initiate a
page fault, aborting the setting of the PG_M bit in the PTE. Now,
however, P4- and Core2-family processors will set the PG_M bit before
observing that the PG_RW bit is clear and initiating a page fault. In
other words, the write does not occur but the PG_M bit is still set.
The real impact of this difference is not that great. Specifically,
we should no longer assert that any PTE with the PG_M bit set must
also have the PG_RW bit set, and we should ignore the state of the
PG_M bit unless the PG_RW bit is set.
r208609
Defer freeing any page table pages in pmap_remove_all() until after the
page queues lock is released. This may reduce the amount of time that the
page queues lock is held by pmap_remove_all().
r208645
When I pushed down the page queues lock into pmap_is_modified(), I created
an ordering dependence: A pmap operation that clears PG_WRITEABLE and calls
vm_page_dirty() must perform the call first. Otherwise, pmap_is_modified()
could return FALSE without acquiring the page queues lock because the page
is not (currently) writeable, and the caller to pmap_is_modified() might
believe that the page's dirty field is clear because it has not seen the
effect of the vm_page_dirty() call.
When I pushed down the page queues lock into pmap_is_modified(), I
overlooked one place where this ordering dependence is violated:
pmap_enter(). In a rare situation pmap_enter() can be called to replace a
dirty mapping to one page with a mapping to another page. (I say rare
because replacements generally occur as a result of a copy-on-write fault,
and so the old page is not dirty.) This change delays clearing PG_WRITEABLE
until after vm_page_dirty() has been called.
Fixing the ordering dependency also makes it easy to introduce a small
optimization: When pmap_enter() used to replace a mapping to one page with a
mapping to another page, it freed the pv entry for the first mapping and
later called the pv entry allocator for the new mapping. Now, pmap_enter()
attempts to recycle the old pv entry, saving two calls to the pv entry
allocator.
There is no point in setting PG_WRITEABLE on unmanaged pages, so don't.
Update a comment to reflect this.
Tidy up the variable declarations at the start of pmap_enter().
2010-05-30 04:44:32 +00:00
|
|
|
|
KASSERT(srcmpte->wire_count > 0,
|
|
|
|
|
("pmap_copy: source page table page is unused"));
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
if (pdnxt > end_addr)
|
|
|
|
|
pdnxt = end_addr;
|
|
|
|
|
|
|
|
|
|
src_pte = vtopte(addr);
|
|
|
|
|
while (addr < pdnxt) {
|
|
|
|
|
pt_entry_t ptetemp;
|
|
|
|
|
ptetemp = *src_pte;
|
|
|
|
|
/*
|
|
|
|
|
* we only virtual copy managed pages
|
|
|
|
|
*/
|
|
|
|
|
if ((ptetemp & PG_MANAGED) != 0) {
|
|
|
|
|
dstmpte = pmap_allocpte(dst_pmap, addr,
|
|
|
|
|
M_NOWAIT);
|
|
|
|
|
if (dstmpte == NULL)
|
|
|
|
|
break;
|
|
|
|
|
dst_pte = pmap_pte_quick(dst_pmap, addr);
|
|
|
|
|
if (*dst_pte == 0 &&
|
|
|
|
|
pmap_try_insert_pv_entry(dst_pmap, addr,
|
|
|
|
|
PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) {
|
|
|
|
|
/*
|
|
|
|
|
* Clear the wired, modified, and
|
|
|
|
|
* accessed (referenced) bits
|
|
|
|
|
* during the copy.
|
|
|
|
|
*/
|
|
|
|
|
KASSERT(ptetemp != 0, ("src_pte not set"));
|
|
|
|
|
PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */);
|
|
|
|
|
KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)),
|
|
|
|
|
("no pmap copy expected: 0x%jx saw: 0x%jx",
|
|
|
|
|
ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte));
|
|
|
|
|
dst_pmap->pm_stats.resident_count++;
|
|
|
|
|
} else {
|
|
|
|
|
free = NULL;
|
|
|
|
|
if (pmap_unwire_pte_hold(dst_pmap,
|
|
|
|
|
dstmpte, &free)) {
|
|
|
|
|
pmap_invalidate_page(dst_pmap,
|
|
|
|
|
addr);
|
|
|
|
|
pmap_free_zero_pages(free);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (dstmpte->wire_count >= srcmpte->wire_count)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
addr += PAGE_SIZE;
|
|
|
|
|
src_pte++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
sched_unpin();
|
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
PMAP_UNLOCK(src_pmap);
|
|
|
|
|
PMAP_UNLOCK(dst_pmap);
|
2011-01-04 15:55:15 +00:00
|
|
|
|
|
|
|
|
|
#ifdef HAMFISTED_LOCKING
|
|
|
|
|
mtx_unlock(&createdelete_lock);
|
|
|
|
|
#endif
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
2009-08-31 23:30:39 +00:00
|
|
|
|
static __inline void
|
|
|
|
|
pagezero(void *page)
|
|
|
|
|
{
|
|
|
|
|
#if defined(I686_CPU)
|
|
|
|
|
if (cpu_class == CPUCLASS_686) {
|
|
|
|
|
#if defined(CPU_ENABLE_SSE)
|
|
|
|
|
if (cpu_feature & CPUID_SSE2)
|
|
|
|
|
sse2_pagezero(page);
|
|
|
|
|
else
|
|
|
|
|
#endif
|
|
|
|
|
i686_pagezero(page);
|
|
|
|
|
} else
|
|
|
|
|
#endif
|
|
|
|
|
bzero(page, PAGE_SIZE);
|
|
|
|
|
}
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* pmap_zero_page zeros the specified hardware page by mapping
|
|
|
|
|
* the page into KVM and using bzero to clear its contents.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_zero_page(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
struct sysmaps *sysmaps;
|
|
|
|
|
|
|
|
|
|
sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
|
|
|
|
|
mtx_lock(&sysmaps->lock);
|
|
|
|
|
if (*sysmaps->CMAP2)
|
|
|
|
|
panic("pmap_zero_page: CMAP2 busy");
|
|
|
|
|
sched_pin();
|
2010-11-20 20:04:29 +00:00
|
|
|
|
PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pagezero(sysmaps->CADDR2);
|
|
|
|
|
PT_SET_MA(sysmaps->CADDR2, 0);
|
|
|
|
|
sched_unpin();
|
|
|
|
|
mtx_unlock(&sysmaps->lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pmap_zero_page_area zeros the specified hardware page by mapping
|
|
|
|
|
* the page into KVM and using bzero to clear its contents.
|
|
|
|
|
*
|
|
|
|
|
* off and size may not cover an area beyond a single hardware page.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_zero_page_area(vm_page_t m, int off, int size)
|
|
|
|
|
{
|
|
|
|
|
struct sysmaps *sysmaps;
|
|
|
|
|
|
|
|
|
|
sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
|
|
|
|
|
mtx_lock(&sysmaps->lock);
|
|
|
|
|
if (*sysmaps->CMAP2)
|
|
|
|
|
panic("pmap_zero_page: CMAP2 busy");
|
|
|
|
|
sched_pin();
|
2010-11-20 20:04:29 +00:00
|
|
|
|
PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
if (off == 0 && size == PAGE_SIZE)
|
|
|
|
|
pagezero(sysmaps->CADDR2);
|
|
|
|
|
else
|
|
|
|
|
bzero((char *)sysmaps->CADDR2 + off, size);
|
|
|
|
|
PT_SET_MA(sysmaps->CADDR2, 0);
|
|
|
|
|
sched_unpin();
|
|
|
|
|
mtx_unlock(&sysmaps->lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pmap_zero_page_idle zeros the specified hardware page by mapping
|
|
|
|
|
* the page into KVM and using bzero to clear its contents. This
|
|
|
|
|
* is intended to be called from the vm_pagezero process only and
|
|
|
|
|
* outside of Giant.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_zero_page_idle(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
if (*CMAP3)
|
|
|
|
|
panic("pmap_zero_page: CMAP3 busy");
|
|
|
|
|
sched_pin();
|
2010-11-20 20:04:29 +00:00
|
|
|
|
PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pagezero(CADDR3);
|
|
|
|
|
PT_SET_MA(CADDR3, 0);
|
|
|
|
|
sched_unpin();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pmap_copy_page copies the specified (machine independent)
|
|
|
|
|
* page by mapping the page into virtual memory and using
|
|
|
|
|
* bcopy to copy the page, one machine dependent page at a
|
|
|
|
|
* time.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_copy_page(vm_page_t src, vm_page_t dst)
|
|
|
|
|
{
|
|
|
|
|
struct sysmaps *sysmaps;
|
|
|
|
|
|
|
|
|
|
sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
|
|
|
|
|
mtx_lock(&sysmaps->lock);
|
|
|
|
|
if (*sysmaps->CMAP1)
|
|
|
|
|
panic("pmap_copy_page: CMAP1 busy");
|
|
|
|
|
if (*sysmaps->CMAP2)
|
|
|
|
|
panic("pmap_copy_page: CMAP2 busy");
|
|
|
|
|
sched_pin();
|
2010-11-20 20:04:29 +00:00
|
|
|
|
PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A);
|
|
|
|
|
PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
|
|
|
|
|
PT_SET_MA(sysmaps->CADDR1, 0);
|
|
|
|
|
PT_SET_MA(sysmaps->CADDR2, 0);
|
|
|
|
|
sched_unpin();
|
|
|
|
|
mtx_unlock(&sysmaps->lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Returns true if the pmap's pv is one of the first
|
|
|
|
|
* 16 pvs linked to from this page. This count may
|
|
|
|
|
* be changed upwards or downwards in the future; it
|
|
|
|
|
* is only necessary that true be returned for a small
|
|
|
|
|
* subset of pmaps for proper page aging.
|
|
|
|
|
*/
|
|
|
|
|
boolean_t
|
|
|
|
|
pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
int loops = 0;
|
2010-06-10 16:56:35 +00:00
|
|
|
|
boolean_t rv;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
2011-08-09 21:01:36 +00:00
|
|
|
|
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
|
2010-06-10 16:56:35 +00:00
|
|
|
|
("pmap_page_exists_quick: page %p is not managed", m));
|
|
|
|
|
rv = FALSE;
|
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
|
|
|
|
|
if (PV_PMAP(pv) == pmap) {
|
2010-06-10 16:56:35 +00:00
|
|
|
|
rv = TRUE;
|
|
|
|
|
break;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
loops++;
|
|
|
|
|
if (loops >= 16)
|
|
|
|
|
break;
|
|
|
|
|
}
|
2010-06-10 16:56:35 +00:00
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
return (rv);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pmap_page_wired_mappings:
|
|
|
|
|
*
|
|
|
|
|
* Return the number of managed mappings to the given physical page
|
|
|
|
|
* that are wired.
|
|
|
|
|
*/
|
|
|
|
|
int
|
|
|
|
|
pmap_page_wired_mappings(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
int count;
|
|
|
|
|
|
|
|
|
|
count = 0;
|
2011-08-09 21:01:36 +00:00
|
|
|
|
if ((m->oflags & VPO_UNMANAGED) != 0)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return (count);
|
2010-05-08 20:34:01 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
sched_pin();
|
|
|
|
|
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pte = pmap_pte_quick(pmap, pv->pv_va);
|
|
|
|
|
if ((*pte & PG_W) != 0)
|
|
|
|
|
count++;
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
sched_unpin();
|
2010-05-08 20:34:01 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return (count);
|
|
|
|
|
}
|
|
|
|
|
|
2008-08-15 04:00:44 +00:00
|
|
|
|
/*
|
|
|
|
|
* Returns TRUE if the given page is mapped individually or as part of
|
|
|
|
|
* a 4mpage. Otherwise, returns FALSE.
|
|
|
|
|
*/
|
|
|
|
|
boolean_t
|
|
|
|
|
pmap_page_is_mapped(vm_page_t m)
|
|
|
|
|
{
|
2010-05-08 20:34:01 +00:00
|
|
|
|
boolean_t rv;
|
2008-08-15 04:00:44 +00:00
|
|
|
|
|
2011-08-09 21:01:36 +00:00
|
|
|
|
if ((m->oflags & VPO_UNMANAGED) != 0)
|
2008-08-15 04:00:44 +00:00
|
|
|
|
return (FALSE);
|
2010-05-08 20:34:01 +00:00
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
|
|
|
|
|
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list);
|
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
return (rv);
|
2008-08-15 04:00:44 +00:00
|
|
|
|
}
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* Remove all pages from specified address space
|
|
|
|
|
* this aids process exit speeds. Also, this code
|
|
|
|
|
* is special cased for current process only, but
|
|
|
|
|
* can have the more generic (and slightly slower)
|
|
|
|
|
* mode enabled. This is much faster than pmap_remove
|
|
|
|
|
* in the case of running down an entire address space.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_remove_pages(pmap_t pmap)
|
|
|
|
|
{
|
|
|
|
|
pt_entry_t *pte, tpte;
|
|
|
|
|
vm_page_t m, free = NULL;
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
struct pv_chunk *pc, *npc;
|
|
|
|
|
int field, idx;
|
|
|
|
|
int32_t bit;
|
|
|
|
|
uint32_t inuse, bitmask;
|
|
|
|
|
int allfree;
|
|
|
|
|
|
|
|
|
|
CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap);
|
|
|
|
|
|
|
|
|
|
if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
|
|
|
|
|
printf("warning: pmap_remove_pages called with non-current pmap\n");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
vm_page_lock_queues();
|
|
|
|
|
KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap"));
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
sched_pin();
|
|
|
|
|
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
|
|
|
|
|
allfree = 1;
|
|
|
|
|
for (field = 0; field < _NPCM; field++) {
|
|
|
|
|
inuse = (~(pc->pc_map[field])) & pc_freemask[field];
|
|
|
|
|
while (inuse != 0) {
|
|
|
|
|
bit = bsfl(inuse);
|
|
|
|
|
bitmask = 1UL << bit;
|
|
|
|
|
idx = field * 32 + bit;
|
|
|
|
|
pv = &pc->pc_pventry[idx];
|
|
|
|
|
inuse &= ~bitmask;
|
|
|
|
|
|
|
|
|
|
pte = vtopte(pv->pv_va);
|
|
|
|
|
tpte = *pte ? xpmap_mtop(*pte) : 0;
|
|
|
|
|
|
|
|
|
|
if (tpte == 0) {
|
|
|
|
|
printf(
|
|
|
|
|
"TPTE at %p IS ZERO @ VA %08x\n",
|
|
|
|
|
pte, pv->pv_va);
|
|
|
|
|
panic("bad pte");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We cannot remove wired pages from a process' mapping at this time
|
|
|
|
|
*/
|
|
|
|
|
if (tpte & PG_W) {
|
|
|
|
|
allfree = 0;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
|
|
|
|
|
KASSERT(m->phys_addr == (tpte & PG_FRAME),
|
|
|
|
|
("vm_page_t %p phys_addr mismatch %016jx %016jx",
|
|
|
|
|
m, (uintmax_t)m->phys_addr,
|
|
|
|
|
(uintmax_t)tpte));
|
|
|
|
|
|
|
|
|
|
KASSERT(m < &vm_page_array[vm_page_array_size],
|
|
|
|
|
("pmap_remove_pages: bad tpte %#jx",
|
|
|
|
|
(uintmax_t)tpte));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PT_CLEAR_VA(pte, FALSE);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Update the vm_page_t clean/reference bits.
|
|
|
|
|
*/
|
|
|
|
|
if (tpte & PG_M)
|
|
|
|
|
vm_page_dirty(m);
|
|
|
|
|
|
|
|
|
|
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
|
|
|
|
|
if (TAILQ_EMPTY(&m->md.pv_list))
|
|
|
|
|
vm_page_flag_clear(m, PG_WRITEABLE);
|
|
|
|
|
|
|
|
|
|
pmap_unuse_pt(pmap, pv->pv_va, &free);
|
|
|
|
|
|
|
|
|
|
/* Mark free */
|
|
|
|
|
PV_STAT(pv_entry_frees++);
|
|
|
|
|
PV_STAT(pv_entry_spare++);
|
|
|
|
|
pv_entry_count--;
|
|
|
|
|
pc->pc_map[field] |= bitmask;
|
|
|
|
|
pmap->pm_stats.resident_count--;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
if (allfree) {
|
|
|
|
|
PV_STAT(pv_entry_spare -= _NPCPV);
|
|
|
|
|
PV_STAT(pc_chunk_count--);
|
|
|
|
|
PV_STAT(pc_chunk_frees++);
|
|
|
|
|
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
|
|
|
|
|
m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
|
|
|
|
|
pmap_qremove((vm_offset_t)pc, 1);
|
|
|
|
|
vm_page_unwire(m, 0);
|
|
|
|
|
vm_page_free(m);
|
|
|
|
|
pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_MA(PADDR1, 0);
|
|
|
|
|
|
|
|
|
|
sched_unpin();
|
|
|
|
|
pmap_invalidate_all(pmap);
|
|
|
|
|
vm_page_unlock_queues();
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
pmap_free_zero_pages(free);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pmap_is_modified:
|
|
|
|
|
*
|
|
|
|
|
* Return whether or not the specified physical page was modified
|
|
|
|
|
* in any physical maps.
|
|
|
|
|
*/
|
|
|
|
|
boolean_t
|
|
|
|
|
pmap_is_modified(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
boolean_t rv;
|
|
|
|
|
|
2011-08-09 21:01:36 +00:00
|
|
|
|
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
("pmap_is_modified: page %p is not managed", m));
|
2008-08-12 19:48:18 +00:00
|
|
|
|
rv = FALSE;
|
|
|
|
|
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
/*
|
|
|
|
|
* If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
|
|
|
|
|
* concurrently set while the object is locked. Thus, if PG_WRITEABLE
|
|
|
|
|
* is clear, no PTEs can have PG_M set.
|
|
|
|
|
*/
|
|
|
|
|
VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
|
|
|
|
|
if ((m->oflags & VPO_BUSY) == 0 &&
|
|
|
|
|
(m->flags & PG_WRITEABLE) == 0)
|
|
|
|
|
return (rv);
|
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
sched_pin();
|
|
|
|
|
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pte = pmap_pte_quick(pmap, pv->pv_va);
|
|
|
|
|
rv = (*pte & PG_M) != 0;
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
if (rv)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_MA(PADDR1, 0);
|
|
|
|
|
sched_unpin();
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return (rv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pmap_is_prefaultable:
|
|
|
|
|
*
|
|
|
|
|
* Return whether or not the specified virtual address is elgible
|
|
|
|
|
* for prefault.
|
|
|
|
|
*/
|
|
|
|
|
static boolean_t
|
|
|
|
|
pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr)
|
|
|
|
|
{
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
boolean_t rv = FALSE;
|
|
|
|
|
|
|
|
|
|
return (rv);
|
|
|
|
|
|
|
|
|
|
if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) {
|
|
|
|
|
pte = vtopte(addr);
|
|
|
|
|
rv = (*pte == 0);
|
|
|
|
|
}
|
|
|
|
|
return (rv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
boolean_t
|
|
|
|
|
pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
|
|
|
|
|
{
|
|
|
|
|
boolean_t rv;
|
|
|
|
|
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
rv = pmap_is_prefaultable_locked(pmap, addr);
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
return (rv);
|
|
|
|
|
}
|
|
|
|
|
|
Resurrect pmap_is_referenced() and use it in mincore(). Essentially,
pmap_ts_referenced() is not always appropriate for checking whether or
not pages have been referenced because it clears any reference bits
that it encounters. For example, in mincore(), clearing the reference
bits has two negative consequences. First, it throws off the activity
count calculations performed by the page daemon. Specifically, a page
on which mincore() has called pmap_ts_referenced() looks less active
to the page daemon than it should. Consequently, the page could be
deactivated prematurely by the page daemon. Arguably, this problem
could be fixed by having mincore() duplicate the activity count
calculation on the page. However, there is a second problem for which
that is not a solution. In order to clear a reference on a 4KB page,
it may be necessary to demote a 2/4MB page mapping. Thus, a mincore()
by one process can have the side effect of demoting a superpage
mapping within another process!
2010-04-24 17:32:52 +00:00
|
|
|
|
boolean_t
|
|
|
|
|
pmap_is_referenced(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
boolean_t rv;
|
|
|
|
|
|
2011-08-09 21:01:36 +00:00
|
|
|
|
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
|
2010-05-26 18:00:44 +00:00
|
|
|
|
("pmap_is_referenced: page %p is not managed", m));
|
Resurrect pmap_is_referenced() and use it in mincore(). Essentially,
pmap_ts_referenced() is not always appropriate for checking whether or
not pages have been referenced because it clears any reference bits
that it encounters. For example, in mincore(), clearing the reference
bits has two negative consequences. First, it throws off the activity
count calculations performed by the page daemon. Specifically, a page
on which mincore() has called pmap_ts_referenced() looks less active
to the page daemon than it should. Consequently, the page could be
deactivated prematurely by the page daemon. Arguably, this problem
could be fixed by having mincore() duplicate the activity count
calculation on the page. However, there is a second problem for which
that is not a solution. In order to clear a reference on a 4KB page,
it may be necessary to demote a 2/4MB page mapping. Thus, a mincore()
by one process can have the side effect of demoting a superpage
mapping within another process!
2010-04-24 17:32:52 +00:00
|
|
|
|
rv = FALSE;
|
2010-05-26 18:00:44 +00:00
|
|
|
|
vm_page_lock_queues();
|
Resurrect pmap_is_referenced() and use it in mincore(). Essentially,
pmap_ts_referenced() is not always appropriate for checking whether or
not pages have been referenced because it clears any reference bits
that it encounters. For example, in mincore(), clearing the reference
bits has two negative consequences. First, it throws off the activity
count calculations performed by the page daemon. Specifically, a page
on which mincore() has called pmap_ts_referenced() looks less active
to the page daemon than it should. Consequently, the page could be
deactivated prematurely by the page daemon. Arguably, this problem
could be fixed by having mincore() duplicate the activity count
calculation on the page. However, there is a second problem for which
that is not a solution. In order to clear a reference on a 4KB page,
it may be necessary to demote a 2/4MB page mapping. Thus, a mincore()
by one process can have the side effect of demoting a superpage
mapping within another process!
2010-04-24 17:32:52 +00:00
|
|
|
|
sched_pin();
|
|
|
|
|
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pte = pmap_pte_quick(pmap, pv->pv_va);
|
|
|
|
|
rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
if (rv)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_MA(PADDR1, 0);
|
|
|
|
|
sched_unpin();
|
2010-05-26 18:00:44 +00:00
|
|
|
|
vm_page_unlock_queues();
|
Resurrect pmap_is_referenced() and use it in mincore(). Essentially,
pmap_ts_referenced() is not always appropriate for checking whether or
not pages have been referenced because it clears any reference bits
that it encounters. For example, in mincore(), clearing the reference
bits has two negative consequences. First, it throws off the activity
count calculations performed by the page daemon. Specifically, a page
on which mincore() has called pmap_ts_referenced() looks less active
to the page daemon than it should. Consequently, the page could be
deactivated prematurely by the page daemon. Arguably, this problem
could be fixed by having mincore() duplicate the activity count
calculation on the page. However, there is a second problem for which
that is not a solution. In order to clear a reference on a 4KB page,
it may be necessary to demote a 2/4MB page mapping. Thus, a mincore()
by one process can have the side effect of demoting a superpage
mapping within another process!
2010-04-24 17:32:52 +00:00
|
|
|
|
return (rv);
|
|
|
|
|
}
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
void
|
|
|
|
|
pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
|
|
|
|
|
{
|
|
|
|
|
int i, npages = round_page(len) >> PAGE_SHIFT;
|
|
|
|
|
for (i = 0; i < npages; i++) {
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
|
2010-12-31 17:39:31 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
|
2010-12-31 17:39:31 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
PMAP_MARK_PRIV(xpmap_mtop(*pte));
|
|
|
|
|
pmap_pte_release(pte);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
|
|
|
|
|
{
|
|
|
|
|
int i, npages = round_page(len) >> PAGE_SHIFT;
|
|
|
|
|
for (i = 0; i < npages; i++) {
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
|
|
|
|
|
PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
|
2010-12-31 17:39:31 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
|
2010-12-31 17:39:31 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pmap_pte_release(pte);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Clear the write and modified bits in each of the given page's mappings.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_remove_write(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
pt_entry_t oldpte, *pte;
|
|
|
|
|
|
2011-08-09 21:01:36 +00:00
|
|
|
|
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
|
2010-05-16 23:45:10 +00:00
|
|
|
|
("pmap_remove_write: page %p is not managed", m));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
|
|
|
|
|
* another thread while the object is locked. Thus, if PG_WRITEABLE
|
|
|
|
|
* is clear, no page table entries need updating.
|
|
|
|
|
*/
|
|
|
|
|
VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
|
|
|
|
|
if ((m->oflags & VPO_BUSY) == 0 &&
|
2008-08-12 19:48:18 +00:00
|
|
|
|
(m->flags & PG_WRITEABLE) == 0)
|
|
|
|
|
return;
|
2010-05-08 20:34:01 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
sched_pin();
|
|
|
|
|
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pte = pmap_pte_quick(pmap, pv->pv_va);
|
|
|
|
|
retry:
|
|
|
|
|
oldpte = *pte;
|
|
|
|
|
if ((oldpte & PG_RW) != 0) {
|
2009-02-08 21:54:51 +00:00
|
|
|
|
vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M);
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* Regardless of whether a pte is 32 or 64 bits
|
|
|
|
|
* in size, PG_RW and PG_M are among the least
|
|
|
|
|
* significant 32 bits.
|
|
|
|
|
*/
|
2009-02-08 21:54:51 +00:00
|
|
|
|
PT_SET_VA_MA(pte, newpte, TRUE);
|
|
|
|
|
if (*pte != newpte)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
goto retry;
|
2009-02-08 21:54:51 +00:00
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
if ((oldpte & PG_M) != 0)
|
|
|
|
|
vm_page_dirty(m);
|
|
|
|
|
pmap_invalidate_page(pmap, pv->pv_va);
|
|
|
|
|
}
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
vm_page_flag_clear(m, PG_WRITEABLE);
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_MA(PADDR1, 0);
|
|
|
|
|
sched_unpin();
|
2010-05-08 20:34:01 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pmap_ts_referenced:
|
|
|
|
|
*
|
|
|
|
|
* Return a count of reference bits for a page, clearing those bits.
|
|
|
|
|
* It is not necessary for every reference bit to be cleared, but it
|
|
|
|
|
* is necessary that 0 only be returned when there are truly no
|
|
|
|
|
* reference bits set.
|
|
|
|
|
*
|
|
|
|
|
* XXX: The exact number of bits to check and clear is a matter that
|
|
|
|
|
* should be tested and standardized at some point in the future for
|
|
|
|
|
* optimal aging of shared pages.
|
|
|
|
|
*/
|
|
|
|
|
int
|
|
|
|
|
pmap_ts_referenced(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv, pvf, pvn;
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
int rtval = 0;
|
|
|
|
|
|
2011-08-09 21:01:36 +00:00
|
|
|
|
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
|
2010-06-10 16:56:35 +00:00
|
|
|
|
("pmap_ts_referenced: page %p is not managed", m));
|
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
sched_pin();
|
|
|
|
|
if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
|
|
|
|
|
pvf = pv;
|
|
|
|
|
do {
|
|
|
|
|
pvn = TAILQ_NEXT(pv, pv_list);
|
|
|
|
|
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
|
|
|
|
|
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pte = pmap_pte_quick(pmap, pv->pv_va);
|
|
|
|
|
if ((*pte & PG_A) != 0) {
|
|
|
|
|
PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
|
|
|
|
|
pmap_invalidate_page(pmap, pv->pv_va);
|
|
|
|
|
rtval++;
|
|
|
|
|
if (rtval > 4)
|
|
|
|
|
pvn = NULL;
|
|
|
|
|
}
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
} while ((pv = pvn) != NULL && pv != pvf);
|
|
|
|
|
}
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
if (*PMAP1)
|
|
|
|
|
PT_SET_MA(PADDR1, 0);
|
|
|
|
|
|
|
|
|
|
sched_unpin();
|
2010-06-10 16:56:35 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return (rtval);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Clear the modify bits on the specified physical page.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_clear_modify(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
|
2011-08-09 21:01:36 +00:00
|
|
|
|
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
("pmap_clear_modify: page %p is not managed", m));
|
|
|
|
|
VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
|
|
|
|
|
KASSERT((m->oflags & VPO_BUSY) == 0,
|
|
|
|
|
("pmap_clear_modify: page %p is busy", m));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the page is not PG_WRITEABLE, then no PTEs can have PG_M set.
|
|
|
|
|
* If the object containing the page is locked and the page is not
|
|
|
|
|
* VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
|
|
|
|
|
*/
|
|
|
|
|
if ((m->flags & PG_WRITEABLE) == 0)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return;
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
sched_pin();
|
|
|
|
|
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pte = pmap_pte_quick(pmap, pv->pv_va);
|
|
|
|
|
if ((*pte & PG_M) != 0) {
|
|
|
|
|
/*
|
|
|
|
|
* Regardless of whether a pte is 32 or 64 bits
|
|
|
|
|
* in size, PG_M is among the least significant
|
|
|
|
|
* 32 bits.
|
|
|
|
|
*/
|
|
|
|
|
PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE);
|
|
|
|
|
pmap_invalidate_page(pmap, pv->pv_va);
|
|
|
|
|
}
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
sched_unpin();
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* pmap_clear_reference:
|
|
|
|
|
*
|
|
|
|
|
* Clear the reference bit on the specified physical page.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_clear_reference(vm_page_t m)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
|
2011-08-09 21:01:36 +00:00
|
|
|
|
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
("pmap_clear_reference: page %p is not managed", m));
|
|
|
|
|
vm_page_lock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
sched_pin();
|
|
|
|
|
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
PMAP_LOCK(pmap);
|
|
|
|
|
pte = pmap_pte_quick(pmap, pv->pv_va);
|
|
|
|
|
if ((*pte & PG_A) != 0) {
|
|
|
|
|
/*
|
|
|
|
|
* Regardless of whether a pte is 32 or 64 bits
|
|
|
|
|
* in size, PG_A is among the least significant
|
|
|
|
|
* 32 bits.
|
|
|
|
|
*/
|
|
|
|
|
PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
|
|
|
|
|
pmap_invalidate_page(pmap, pv->pv_va);
|
|
|
|
|
}
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
}
|
|
|
|
|
sched_unpin();
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
vm_page_unlock_queues();
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Miscellaneous support routines follow
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Map a set of physical memory pages into the kernel virtual
|
|
|
|
|
* address space. Return a pointer to where it is mapped. This
|
|
|
|
|
* routine is intended to be used for mapping device memory,
|
|
|
|
|
* NOT real memory.
|
|
|
|
|
*/
|
|
|
|
|
void *
|
|
|
|
|
pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
|
|
|
|
|
{
|
2009-07-29 19:38:33 +00:00
|
|
|
|
vm_offset_t va, offset;
|
|
|
|
|
vm_size_t tmpsize;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
offset = pa & PAGE_MASK;
|
|
|
|
|
size = roundup(offset + size, PAGE_SIZE);
|
|
|
|
|
pa = pa & PG_FRAME;
|
|
|
|
|
|
|
|
|
|
if (pa < KERNLOAD && pa + size <= KERNLOAD)
|
|
|
|
|
va = KERNBASE + pa;
|
|
|
|
|
else
|
|
|
|
|
va = kmem_alloc_nofault(kernel_map, size);
|
|
|
|
|
if (!va)
|
|
|
|
|
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
|
|
|
|
|
|
2009-07-29 19:38:33 +00:00
|
|
|
|
for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
|
|
|
|
|
pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
|
|
|
|
|
pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
|
|
|
|
|
pmap_invalidate_cache_range(va, va + size);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return ((void *)(va + offset));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void *
|
|
|
|
|
pmap_mapdev(vm_paddr_t pa, vm_size_t size)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void *
|
|
|
|
|
pmap_mapbios(vm_paddr_t pa, vm_size_t size)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_unmapdev(vm_offset_t va, vm_size_t size)
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t base, offset, tmpva;
|
|
|
|
|
|
|
|
|
|
if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
|
|
|
|
|
return;
|
|
|
|
|
base = trunc_page(va);
|
|
|
|
|
offset = va & PAGE_MASK;
|
|
|
|
|
size = roundup(offset + size, PAGE_SIZE);
|
|
|
|
|
critical_enter();
|
|
|
|
|
for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
|
|
|
|
|
pmap_kremove(tmpva);
|
|
|
|
|
pmap_invalidate_range(kernel_pmap, va, tmpva);
|
|
|
|
|
critical_exit();
|
|
|
|
|
kmem_free(kernel_map, base, size);
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-19 21:40:19 +00:00
|
|
|
|
/*
|
|
|
|
|
* Sets the memory attribute for the specified page.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
|
|
|
|
|
{
|
2009-07-29 19:38:33 +00:00
|
|
|
|
struct sysmaps *sysmaps;
|
|
|
|
|
vm_offset_t sva, eva;
|
2009-07-19 21:40:19 +00:00
|
|
|
|
|
|
|
|
|
m->md.pat_mode = ma;
|
2009-07-29 19:38:33 +00:00
|
|
|
|
if ((m->flags & PG_FICTITIOUS) != 0)
|
|
|
|
|
return;
|
2009-07-19 21:40:19 +00:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If "m" is a normal page, flush it from the cache.
|
2009-07-29 19:38:33 +00:00
|
|
|
|
* See pmap_invalidate_cache_range().
|
|
|
|
|
*
|
|
|
|
|
* First, try to find an existing mapping of the page by sf
|
|
|
|
|
* buffer. sf_buf_invalidate_cache() modifies mapping and
|
|
|
|
|
* flushes the cache.
|
2009-07-19 21:40:19 +00:00
|
|
|
|
*/
|
2009-07-29 19:38:33 +00:00
|
|
|
|
if (sf_buf_invalidate_cache(m))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If page is not mapped by sf buffer, but CPU does not
|
|
|
|
|
* support self snoop, map the page transient and do
|
|
|
|
|
* invalidation. In the worst case, whole cache is flushed by
|
|
|
|
|
* pmap_invalidate_cache_range().
|
|
|
|
|
*/
|
|
|
|
|
if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) {
|
|
|
|
|
sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
|
|
|
|
|
mtx_lock(&sysmaps->lock);
|
|
|
|
|
if (*sysmaps->CMAP2)
|
|
|
|
|
panic("pmap_page_set_memattr: CMAP2 busy");
|
|
|
|
|
sched_pin();
|
|
|
|
|
PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
|
2010-11-20 20:04:29 +00:00
|
|
|
|
VM_PAGE_TO_MACH(m) | PG_A | PG_M |
|
2009-07-29 19:38:33 +00:00
|
|
|
|
pmap_cache_bits(m->md.pat_mode, 0));
|
|
|
|
|
invlcaddr(sysmaps->CADDR2);
|
|
|
|
|
sva = (vm_offset_t)sysmaps->CADDR2;
|
|
|
|
|
eva = sva + PAGE_SIZE;
|
|
|
|
|
} else
|
|
|
|
|
sva = eva = 0; /* gcc */
|
|
|
|
|
pmap_invalidate_cache_range(sva, eva);
|
|
|
|
|
if (sva != 0) {
|
|
|
|
|
PT_SET_MA(sysmaps->CADDR2, 0);
|
|
|
|
|
sched_unpin();
|
|
|
|
|
mtx_unlock(&sysmaps->lock);
|
2009-07-19 21:40:19 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
int
|
|
|
|
|
pmap_change_attr(va, size, mode)
|
|
|
|
|
vm_offset_t va;
|
|
|
|
|
vm_size_t size;
|
|
|
|
|
int mode;
|
|
|
|
|
{
|
|
|
|
|
vm_offset_t base, offset, tmpva;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
u_int opte, npte;
|
|
|
|
|
pd_entry_t *pde;
|
2009-07-29 19:38:33 +00:00
|
|
|
|
boolean_t changed;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
base = trunc_page(va);
|
|
|
|
|
offset = va & PAGE_MASK;
|
|
|
|
|
size = roundup(offset + size, PAGE_SIZE);
|
|
|
|
|
|
|
|
|
|
/* Only supported on kernel virtual addresses. */
|
|
|
|
|
if (base <= VM_MAXUSER_ADDRESS)
|
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
|
|
/* 4MB pages and pages that aren't mapped aren't supported. */
|
|
|
|
|
for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
|
|
|
|
|
pde = pmap_pde(kernel_pmap, tmpva);
|
|
|
|
|
if (*pde & PG_PS)
|
|
|
|
|
return (EINVAL);
|
|
|
|
|
if ((*pde & PG_V) == 0)
|
|
|
|
|
return (EINVAL);
|
|
|
|
|
pte = vtopte(va);
|
|
|
|
|
if ((*pte & PG_V) == 0)
|
|
|
|
|
return (EINVAL);
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-29 19:38:33 +00:00
|
|
|
|
changed = FALSE;
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* Ok, all the pages exist and are 4k, so run through them updating
|
|
|
|
|
* their cache mode.
|
|
|
|
|
*/
|
|
|
|
|
for (tmpva = base; size > 0; ) {
|
|
|
|
|
pte = vtopte(tmpva);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The cache mode bits are all in the low 32-bits of the
|
|
|
|
|
* PTE, so we can just spin on updating the low 32-bits.
|
|
|
|
|
*/
|
|
|
|
|
do {
|
|
|
|
|
opte = *(u_int *)pte;
|
|
|
|
|
npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
|
|
|
|
|
npte |= pmap_cache_bits(mode, 0);
|
|
|
|
|
PT_SET_VA_MA(pte, npte, TRUE);
|
|
|
|
|
} while (npte != opte && (*pte != npte));
|
2009-07-29 19:38:33 +00:00
|
|
|
|
if (npte != opte)
|
|
|
|
|
changed = TRUE;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
tmpva += PAGE_SIZE;
|
|
|
|
|
size -= PAGE_SIZE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Flush CPU caches to make sure any data isn't cached that shouldn't
|
|
|
|
|
* be, etc.
|
|
|
|
|
*/
|
2009-07-29 19:38:33 +00:00
|
|
|
|
if (changed) {
|
|
|
|
|
pmap_invalidate_range(kernel_pmap, base, tmpva);
|
|
|
|
|
pmap_invalidate_cache_range(base, tmpva);
|
|
|
|
|
}
|
2008-08-12 19:48:18 +00:00
|
|
|
|
return (0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* perform the pmap work for mincore
|
|
|
|
|
*/
|
|
|
|
|
int
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
{
|
|
|
|
|
pt_entry_t *ptep, pte;
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
vm_paddr_t pa;
|
|
|
|
|
int val;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
|
|
|
|
PMAP_LOCK(pmap);
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
retry:
|
2008-08-12 19:48:18 +00:00
|
|
|
|
ptep = pmap_pte(pmap, addr);
|
|
|
|
|
pte = (ptep != NULL) ? PT_GET(ptep) : 0;
|
|
|
|
|
pmap_pte_release(ptep);
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
val = 0;
|
|
|
|
|
if ((pte & PG_V) != 0) {
|
|
|
|
|
val |= MINCORE_INCORE;
|
|
|
|
|
if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
|
|
|
|
|
val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
|
|
|
|
|
if ((pte & PG_A) != 0)
|
|
|
|
|
val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
|
|
|
|
|
}
|
|
|
|
|
if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
|
|
|
|
|
(MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
|
|
|
|
|
(pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
|
2008-08-12 19:48:18 +00:00
|
|
|
|
pa = pte & PG_FRAME;
|
Roughly half of a typical pmap_mincore() implementation is machine-
independent code. Move this code into mincore(), and eliminate the
page queues lock from pmap_mincore().
Push down the page queues lock into pmap_clear_modify(),
pmap_clear_reference(), and pmap_is_modified(). Assert that these
functions are never passed an unmanaged page.
Eliminate an inaccurate comment from powerpc/powerpc/mmu_if.m:
Contrary to what the comment says, pmap_mincore() is not simply an
optimization. Without a complete pmap_mincore() implementation,
mincore() cannot return either MINCORE_MODIFIED or MINCORE_REFERENCED
because only the pmap can provide this information.
Eliminate the page queues lock from vfs_setdirty_locked_object(),
vm_pageout_clean(), vm_object_page_collect_flush(), and
vm_object_page_clean(). Generally speaking, these are all accesses
to the page's dirty field, which are synchronized by the containing
vm object's lock.
Reduce the scope of the page queues lock in vm_object_madvise() and
vm_page_dontneed().
Reviewed by: kib (an earlier version)
2010-05-24 14:26:57 +00:00
|
|
|
|
/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
|
|
|
|
|
if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
|
|
|
|
|
goto retry;
|
|
|
|
|
} else
|
|
|
|
|
PA_UNLOCK_COND(*locked_pa);
|
|
|
|
|
PMAP_UNLOCK(pmap);
|
|
|
|
|
return (val);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_activate(struct thread *td)
|
|
|
|
|
{
|
|
|
|
|
pmap_t pmap, oldpmap;
|
2011-06-28 13:13:06 +00:00
|
|
|
|
u_int cpuid;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
u_int32_t cr3;
|
|
|
|
|
|
|
|
|
|
critical_enter();
|
|
|
|
|
pmap = vmspace_pmap(td->td_proc->p_vmspace);
|
|
|
|
|
oldpmap = PCPU_GET(curpmap);
|
2011-06-28 13:13:06 +00:00
|
|
|
|
cpuid = PCPU_GET(cpuid);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#if defined(SMP)
|
2011-06-28 13:13:06 +00:00
|
|
|
|
CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
|
|
|
|
|
CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#else
|
2011-06-28 13:13:06 +00:00
|
|
|
|
CPU_CLR(cpuid, &oldpmap->pm_active);
|
|
|
|
|
CPU_SET(cpuid, &pmap->pm_active);
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#endif
|
|
|
|
|
#ifdef PAE
|
|
|
|
|
cr3 = vtophys(pmap->pm_pdpt);
|
|
|
|
|
#else
|
|
|
|
|
cr3 = vtophys(pmap->pm_pdir);
|
|
|
|
|
#endif
|
|
|
|
|
/*
|
|
|
|
|
* pmap_activate is for the current thread on the current cpu
|
|
|
|
|
*/
|
|
|
|
|
td->td_pcb->pcb_cr3 = cr3;
|
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
|
load_cr3(cr3);
|
|
|
|
|
PCPU_SET(curpmap, pmap);
|
|
|
|
|
critical_exit();
|
|
|
|
|
}
|
|
|
|
|
|
2009-10-21 18:38:02 +00:00
|
|
|
|
void
|
|
|
|
|
pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2008-08-15 04:00:44 +00:00
|
|
|
|
/*
|
|
|
|
|
* Increase the starting virtual address of the given mapping if a
|
|
|
|
|
* different alignment might result in more superpage mappings.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
|
|
|
|
|
vm_offset_t *addr, vm_size_t size)
|
2008-08-12 19:48:18 +00:00
|
|
|
|
{
|
2008-08-15 04:00:44 +00:00
|
|
|
|
vm_offset_t superpage_offset;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
|
2008-08-15 04:00:44 +00:00
|
|
|
|
if (size < NBPDR)
|
|
|
|
|
return;
|
|
|
|
|
if (object != NULL && (object->flags & OBJ_COLORED) != 0)
|
|
|
|
|
offset += ptoa(object->pg_color);
|
|
|
|
|
superpage_offset = offset & PDRMASK;
|
|
|
|
|
if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
|
|
|
|
|
(*addr & PDRMASK) == superpage_offset)
|
|
|
|
|
return;
|
|
|
|
|
if ((*addr & PDRMASK) < superpage_offset)
|
|
|
|
|
*addr = (*addr & ~PDRMASK) + superpage_offset;
|
|
|
|
|
else
|
|
|
|
|
*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
|
2008-08-12 19:48:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
2009-04-01 17:06:28 +00:00
|
|
|
|
void
|
|
|
|
|
pmap_suspend()
|
|
|
|
|
{
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
int i, pdir, offset;
|
|
|
|
|
vm_paddr_t pdirma;
|
|
|
|
|
mmu_update_t mu[4];
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We need to remove the recursive mapping structure from all
|
|
|
|
|
* our pmaps so that Xen doesn't get confused when it restores
|
|
|
|
|
* the page tables. The recursive map lives at page directory
|
|
|
|
|
* index PTDPTDI. We assume that the suspend code has stopped
|
|
|
|
|
* the other vcpus (if any).
|
|
|
|
|
*/
|
|
|
|
|
LIST_FOREACH(pmap, &allpmaps, pm_list) {
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
|
/*
|
|
|
|
|
* Figure out which page directory (L2) page
|
|
|
|
|
* contains this bit of the recursive map and
|
|
|
|
|
* the offset within that page of the map
|
|
|
|
|
* entry
|
|
|
|
|
*/
|
|
|
|
|
pdir = (PTDPTDI + i) / NPDEPG;
|
|
|
|
|
offset = (PTDPTDI + i) % NPDEPG;
|
|
|
|
|
pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
|
|
|
|
|
mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
|
|
|
|
|
mu[i].val = 0;
|
|
|
|
|
}
|
|
|
|
|
HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_resume()
|
|
|
|
|
{
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
int i, pdir, offset;
|
|
|
|
|
vm_paddr_t pdirma;
|
|
|
|
|
mmu_update_t mu[4];
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Restore the recursive map that we removed on suspend.
|
|
|
|
|
*/
|
|
|
|
|
LIST_FOREACH(pmap, &allpmaps, pm_list) {
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
|
/*
|
|
|
|
|
* Figure out which page directory (L2) page
|
|
|
|
|
* contains this bit of the recursive map and
|
|
|
|
|
* the offset within that page of the map
|
|
|
|
|
* entry
|
|
|
|
|
*/
|
|
|
|
|
pdir = (PTDPTDI + i) / NPDEPG;
|
|
|
|
|
offset = (PTDPTDI + i) % NPDEPG;
|
|
|
|
|
pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
|
|
|
|
|
mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
|
|
|
|
|
mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V;
|
|
|
|
|
}
|
|
|
|
|
HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2008-08-12 19:48:18 +00:00
|
|
|
|
#if defined(PMAP_DEBUG)
|
|
|
|
|
pmap_pid_dump(int pid)
|
|
|
|
|
{
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
struct proc *p;
|
|
|
|
|
int npte = 0;
|
|
|
|
|
int index;
|
|
|
|
|
|
|
|
|
|
sx_slock(&allproc_lock);
|
|
|
|
|
FOREACH_PROC_IN_SYSTEM(p) {
|
|
|
|
|
if (p->p_pid != pid)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (p->p_vmspace) {
|
|
|
|
|
int i,j;
|
|
|
|
|
index = 0;
|
|
|
|
|
pmap = vmspace_pmap(p->p_vmspace);
|
|
|
|
|
for (i = 0; i < NPDEPTD; i++) {
|
|
|
|
|
pd_entry_t *pde;
|
|
|
|
|
pt_entry_t *pte;
|
|
|
|
|
vm_offset_t base = i << PDRSHIFT;
|
|
|
|
|
|
|
|
|
|
pde = &pmap->pm_pdir[i];
|
|
|
|
|
if (pde && pmap_pde_v(pde)) {
|
|
|
|
|
for (j = 0; j < NPTEPG; j++) {
|
|
|
|
|
vm_offset_t va = base + (j << PAGE_SHIFT);
|
|
|
|
|
if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
|
|
|
|
|
if (index) {
|
|
|
|
|
index = 0;
|
|
|
|
|
printf("\n");
|
|
|
|
|
}
|
|
|
|
|
sx_sunlock(&allproc_lock);
|
|
|
|
|
return npte;
|
|
|
|
|
}
|
|
|
|
|
pte = pmap_pte(pmap, va);
|
|
|
|
|
if (pte && pmap_pte_v(pte)) {
|
|
|
|
|
pt_entry_t pa;
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
pa = PT_GET(pte);
|
|
|
|
|
m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
|
|
|
|
|
printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
|
|
|
|
|
va, pa, m->hold_count, m->wire_count, m->flags);
|
|
|
|
|
npte++;
|
|
|
|
|
index++;
|
|
|
|
|
if (index >= 2) {
|
|
|
|
|
index = 0;
|
|
|
|
|
printf("\n");
|
|
|
|
|
} else {
|
|
|
|
|
printf(" ");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
sx_sunlock(&allproc_lock);
|
|
|
|
|
return npte;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if defined(DEBUG)
|
|
|
|
|
|
|
|
|
|
static void pads(pmap_t pm);
|
|
|
|
|
void pmap_pvdump(vm_paddr_t pa);
|
|
|
|
|
|
|
|
|
|
/* print address space of pmap*/
|
|
|
|
|
static void
|
|
|
|
|
pads(pmap_t pm)
|
|
|
|
|
{
|
|
|
|
|
int i, j;
|
|
|
|
|
vm_paddr_t va;
|
|
|
|
|
pt_entry_t *ptep;
|
|
|
|
|
|
|
|
|
|
if (pm == kernel_pmap)
|
|
|
|
|
return;
|
|
|
|
|
for (i = 0; i < NPDEPTD; i++)
|
|
|
|
|
if (pm->pm_pdir[i])
|
|
|
|
|
for (j = 0; j < NPTEPG; j++) {
|
|
|
|
|
va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
|
|
|
|
|
if (pm == kernel_pmap && va < KERNBASE)
|
|
|
|
|
continue;
|
|
|
|
|
if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
|
|
|
|
|
continue;
|
|
|
|
|
ptep = pmap_pte(pm, va);
|
|
|
|
|
if (pmap_pte_v(ptep))
|
|
|
|
|
printf("%x:%x ", va, *ptep);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
pmap_pvdump(vm_paddr_t pa)
|
|
|
|
|
{
|
|
|
|
|
pv_entry_t pv;
|
|
|
|
|
pmap_t pmap;
|
|
|
|
|
vm_page_t m;
|
|
|
|
|
|
|
|
|
|
printf("pa %x", pa);
|
|
|
|
|
m = PHYS_TO_VM_PAGE(pa);
|
|
|
|
|
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
|
|
|
|
|
pmap = PV_PMAP(pv);
|
|
|
|
|
printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
|
|
|
|
|
pads(pmap);
|
|
|
|
|
}
|
|
|
|
|
printf(" ");
|
|
|
|
|
}
|
|
|
|
|
#endif
|