Redo the page table page allocation on MIPS, as suggested by

alc@.

The UMA zone based allocation is replaced by a scheme that creates
a new free page list for the KSEG0 region, and a new function
in sys/vm that allocates pages from a specific free page list.

This also fixes a race condition introduced by the UMA based page table
page allocation code. Dropping the page queue and pmap locks before
the call to uma_zfree, and re-acquiring them afterwards  will introduce
a race condtion(noted by alc@).

The changes are :
- Revert the earlier changes in MIPS pmap.c that added UMA zone for
page table pages.
- Add a new freelist VM_FREELIST_HIGHMEM to MIPS vmparam.h for memory that
is not directly mapped (in 32bit kernel). Normal page allocations will first
try the HIGHMEM freelist and then the default(direct mapped) freelist.
- Add a new function 'vm_page_t vm_page_alloc_freelist(int flind, int
order, int req)' to vm/vm_page.c to allocate a page from a specified
freelist. The MIPS page table pages will be allocated using this function
from the freelist containing direct mapped pages.
- Move the page initialization code from vm_phys_alloc_contig() to a
new function vm_page_alloc_init(), and use this function to initialize
pages in vm_page_alloc_freelist() too.
- Split the  function vm_phys_alloc_pages(int pool, int order) to create
vm_phys_alloc_freelist_pages(int flind, int pool, int order), and use
this function from both vm_page_alloc_freelist() and vm_phys_alloc_pages().

Reviewed by:	alc
This commit is contained in:
Jayachandran C. 2010-07-21 09:27:00 +00:00
parent 3fd7b10a01
commit 49ca10d40c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=210327
6 changed files with 202 additions and 178 deletions

View File

@ -125,7 +125,6 @@
#define VM_NRESERVLEVEL 0
#endif
/* virtual sizes (bytes) for various kernel submaps */
#ifndef VM_KMEM_SIZE
#define VM_KMEM_SIZE (12 * 1024 * 1024)
@ -174,13 +173,24 @@
#define VM_FREEPOOL_DIRECT 1
/*
* we support 1 free list:
* we support 2 free lists:
*
* - DEFAULT for all systems
* - DEFAULT for direct mapped (KSEG0) pages.
* Note: This usage of DEFAULT may be misleading because we use
* DEFAULT for allocating direct mapped pages. The normal page
* allocations use HIGHMEM if available, and then DEFAULT.
* - HIGHMEM for other pages
*/
#ifdef __mips_n64
#define VM_NFREELIST 1
#define VM_FREELIST_DEFAULT 0
#else
#define VM_NFREELIST 2
#define VM_FREELIST_DEFAULT 1
#define VM_FREELIST_HIGHMEM 0
#define VM_FREELIST_DIRECT VM_FREELIST_DEFAULT
#define VM_HIGHMEM_ADDRESS ((vm_paddr_t)0x20000000)
#endif
/*
* The largest allocation size is 1MB.

View File

@ -187,8 +187,8 @@ static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot);
static vm_page_t pmap_alloc_pte_page(pmap_t, unsigned int, int, vm_offset_t *);
static void pmap_release_pte_page(vm_page_t);
static vm_page_t pmap_alloc_pte_page(unsigned int index, int req);
static void pmap_grow_pte_page_cache(void);
#ifdef SMP
static void pmap_invalidate_page_action(void *arg);
@ -196,10 +196,6 @@ static void pmap_invalidate_all_action(void *arg);
static void pmap_update_page_action(void *arg);
#endif
static void pmap_ptpgzone_dtor(void *mem, int size, void *arg);
static void *pmap_ptpgzone_allocf(uma_zone_t, int, u_int8_t *, int);
static uma_zone_t ptpgzone;
#if !defined(__mips_n64)
struct local_sysmaps {
vm_offset_t base;
@ -539,10 +535,6 @@ pmap_init(void)
pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count;
pv_entry_high_water = 9 * (pv_entry_max / 10);
uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
ptpgzone = uma_zcreate("PT ENTRY", PAGE_SIZE, NULL, pmap_ptpgzone_dtor,
NULL, NULL, PAGE_SIZE - 1, UMA_ZONE_NOFREE | UMA_ZONE_ZINIT);
uma_zone_set_allocf(ptpgzone, pmap_ptpgzone_allocf);
}
/***************************************************
@ -882,12 +874,8 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
/*
* If the page is finally unwired, simply free it.
*/
vm_page_free_zero(m);
atomic_subtract_int(&cnt.v_wire_count, 1);
PMAP_UNLOCK(pmap);
vm_page_unlock_queues();
pmap_release_pte_page(m);
vm_page_lock_queues();
PMAP_LOCK(pmap);
return (1);
}
@ -947,95 +935,30 @@ pmap_pinit0(pmap_t pmap)
}
static void
pmap_ptpgzone_dtor(void *mem, int size, void *arg)
pmap_grow_pte_page_cache()
{
#ifdef INVARIANTS
static char zeropage[PAGE_SIZE];
KASSERT(size == PAGE_SIZE,
("pmap_ptpgzone_dtor: invalid size %d", size));
KASSERT(bcmp(mem, zeropage, PAGE_SIZE) == 0,
("pmap_ptpgzone_dtor: freeing a non-zeroed page"));
#endif
vm_contig_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS);
}
static void *
pmap_ptpgzone_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
vm_page_t m;
vm_paddr_t paddr;
int tries;
KASSERT(bytes == PAGE_SIZE,
("pmap_ptpgzone_allocf: invalid allocation size %d", bytes));
*flags = UMA_SLAB_PRIV;
tries = 0;
retry:
m = vm_phys_alloc_contig(1, 0, MIPS_KSEG0_LARGEST_PHYS,
PAGE_SIZE, PAGE_SIZE);
if (m == NULL) {
if (tries < ((wait & M_NOWAIT) != 0 ? 1 : 3)) {
vm_contig_grow_cache(tries, 0, MIPS_KSEG0_LARGEST_PHYS);
tries++;
goto retry;
} else
return (NULL);
}
paddr = VM_PAGE_TO_PHYS(m);
return ((void *)MIPS_PHYS_TO_KSEG0(paddr));
}
static vm_page_t
pmap_alloc_pte_page(pmap_t pmap, unsigned int index, int wait, vm_offset_t *vap)
pmap_alloc_pte_page(unsigned int index, int req)
{
vm_paddr_t paddr;
void *va;
vm_page_t m;
int locked;
locked = mtx_owned(&pmap->pm_mtx);
if (locked) {
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
PMAP_UNLOCK(pmap);
vm_page_unlock_queues();
}
va = uma_zalloc(ptpgzone, wait);
if (locked) {
vm_page_lock_queues();
PMAP_LOCK(pmap);
}
if (va == NULL)
m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, 0, req);
if (m == NULL)
return (NULL);
paddr = MIPS_KSEG0_TO_PHYS(va);
m = PHYS_TO_VM_PAGE(paddr);
if (!locked)
vm_page_lock_queues();
if ((m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
m->pindex = index;
m->valid = VM_PAGE_BITS_ALL;
m->wire_count = 1;
if (!locked)
vm_page_unlock_queues();
atomic_add_int(&cnt.v_wire_count, 1);
*vap = (vm_offset_t)va;
m->wire_count = 1;
return (m);
}
static void
pmap_release_pte_page(vm_page_t m)
{
void *va;
vm_paddr_t paddr;
paddr = VM_PAGE_TO_PHYS(m);
va = (void *)MIPS_PHYS_TO_KSEG0(paddr);
uma_zfree(ptpgzone, va);
}
/*
* Initialize a preallocated and zeroed pmap structure,
* such as one in a vmspace structure.
@ -1052,10 +975,10 @@ pmap_pinit(pmap_t pmap)
/*
* allocate the page directory page
*/
ptdpg = pmap_alloc_pte_page(pmap, NUSERPGTBLS, M_WAITOK, &ptdva);
if (ptdpg == NULL)
return (0);
while ((ptdpg = pmap_alloc_pte_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL)
pmap_grow_pte_page_cache();
ptdva = MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(ptdpg));
pmap->pm_segtab = (pd_entry_t *)ptdva;
pmap->pm_active = 0;
pmap->pm_ptphint = NULL;
@ -1086,15 +1009,28 @@ _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
/*
* Find or fabricate a new pagetable page
*/
m = pmap_alloc_pte_page(pmap, ptepindex, flags, &pteva);
if (m == NULL)
if ((m = pmap_alloc_pte_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) {
if (flags & M_WAITOK) {
PMAP_UNLOCK(pmap);
vm_page_unlock_queues();
pmap_grow_pte_page_cache();
vm_page_lock_queues();
PMAP_LOCK(pmap);
}
/*
* Indicate the need to retry. While waiting, the page
* table page may have been allocated.
*/
return (NULL);
}
/*
* Map the pagetable page into the process address space, if it
* isn't already there.
*/
pteva = MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(m));
pmap->pm_stats.resident_count++;
pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva;
@ -1190,7 +1126,7 @@ pmap_release(pmap_t pmap)
ptdpg->wire_count--;
atomic_subtract_int(&cnt.v_wire_count, 1);
pmap_release_pte_page(ptdpg);
vm_page_free_zero(ptdpg);
PMAP_LOCK_DESTROY(pmap);
}
@ -1200,7 +1136,6 @@ pmap_release(pmap_t pmap)
void
pmap_growkernel(vm_offset_t addr)
{
vm_offset_t pageva;
vm_page_t nkpg;
pt_entry_t *pte;
int i;
@ -1235,14 +1170,13 @@ pmap_growkernel(vm_offset_t addr)
/*
* This index is bogus, but out of the way
*/
nkpg = pmap_alloc_pte_page(kernel_pmap, nkpt, M_NOWAIT, &pageva);
nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT);
if (!nkpg)
panic("pmap_growkernel: no memory to grow kernel");
nkpt++;
pte = (pt_entry_t *)pageva;
segtab_pde(kernel_segmap, kernel_vm_end) = pte;
pte = (pt_entry_t *)MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(nkpg));
segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte;
/*
* The R[4-7]?00 stores only one copy of the Global bit in

View File

@ -1354,6 +1354,95 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
return (m);
}
/*
* Initialize a page that has been freshly dequeued from a freelist.
* The caller has to drop the vnode returned, if it is not NULL.
*
* To be called with vm_page_queue_free_mtx held.
*/
struct vnode *
vm_page_alloc_init(vm_page_t m)
{
struct vnode *drop;
vm_object_t m_object;
KASSERT(m->queue == PQ_NONE,
("vm_page_alloc_init: page %p has unexpected queue %d",
m, m->queue));
KASSERT(m->wire_count == 0,
("vm_page_alloc_init: page %p is wired", m));
KASSERT(m->hold_count == 0,
("vm_page_alloc_init: page %p is held", m));
KASSERT(m->busy == 0,
("vm_page_alloc_init: page %p is busy", m));
KASSERT(m->dirty == 0,
("vm_page_alloc_init: page %p is dirty", m));
KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
("vm_page_alloc_init: page %p has unexpected memattr %d",
m, pmap_page_get_memattr(m)));
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
drop = NULL;
if ((m->flags & PG_CACHED) != 0) {
m->valid = 0;
m_object = m->object;
vm_page_cache_remove(m);
if (m_object->type == OBJT_VNODE &&
m_object->cache == NULL)
drop = m_object->handle;
} else {
KASSERT(VM_PAGE_IS_FREE(m),
("vm_page_alloc_init: page %p is not free", m));
KASSERT(m->valid == 0,
("vm_page_alloc_init: free page %p is valid", m));
cnt.v_free_count--;
}
if (m->flags & PG_ZERO)
vm_page_zero_count--;
/* Don't clear the PG_ZERO flag; we'll need it later. */
m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
m->oflags = 0;
/* Unmanaged pages don't use "act_count". */
return (drop);
}
/*
* vm_page_alloc_freelist:
*
* Allocate a page from the specified freelist with specified order.
* Only the ALLOC_CLASS values in req are honored, other request flags
* are ignored.
*/
vm_page_t
vm_page_alloc_freelist(int flind, int order, int req)
{
struct vnode *drop;
vm_page_t m;
int page_req;
m = NULL;
page_req = req & VM_ALLOC_CLASS_MASK;
mtx_lock(&vm_page_queue_free_mtx);
/*
* Do not allocate reserved pages unless the req has asked for it.
*/
if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved ||
(page_req == VM_ALLOC_SYSTEM &&
cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) ||
(page_req == VM_ALLOC_INTERRUPT &&
cnt.v_free_count + cnt.v_cache_count > 0)) {
m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, order);
}
if (m == NULL) {
mtx_unlock(&vm_page_queue_free_mtx);
return (NULL);
}
drop = vm_page_alloc_init(m);
mtx_unlock(&vm_page_queue_free_mtx);
if (drop)
vdrop(drop);
return (m);
}
/*
* vm_wait: (also see VM_WAIT macro)
*

View File

@ -262,6 +262,7 @@ extern struct vpglocks pa_lock[];
*
*/
struct vnode;
extern int vm_page_zero_count;
extern vm_page_t vm_page_array; /* First resident page in table */
@ -339,6 +340,8 @@ void vm_pageq_remove(vm_page_t m);
void vm_page_activate (vm_page_t);
vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
vm_page_t vm_page_alloc_freelist(int, int, int);
struct vnode *vm_page_alloc_init(vm_page_t);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
void vm_page_cache(vm_page_t);
void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t);

View File

@ -301,49 +301,67 @@ vm_phys_add_page(vm_paddr_t pa)
vm_page_t
vm_phys_alloc_pages(int pool, int order)
{
vm_page_t m;
int flind;
for (flind = 0; flind < vm_nfreelists; flind++) {
m = vm_phys_alloc_freelist_pages(flind, pool, order);
if (m != NULL)
return (m);
}
return (NULL);
}
/*
* Find and dequeue a free page on the given free list, with the
* specified pool and order
*/
vm_page_t
vm_phys_alloc_freelist_pages(int flind, int pool, int order)
{
struct vm_freelist *fl;
struct vm_freelist *alt;
int flind, oind, pind;
int oind, pind;
vm_page_t m;
KASSERT(flind < VM_NFREELIST,
("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
KASSERT(pool < VM_NFREEPOOL,
("vm_phys_alloc_pages: pool %d is out of range", pool));
("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
KASSERT(order < VM_NFREEORDER,
("vm_phys_alloc_pages: order %d is out of range", order));
("vm_phys_alloc_freelist_pages: order %d is out of range", order));
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
for (flind = 0; flind < vm_nfreelists; flind++) {
fl = vm_phys_free_queues[flind][pool];
for (oind = order; oind < VM_NFREEORDER; oind++) {
m = TAILQ_FIRST(&fl[oind].pl);
fl = vm_phys_free_queues[flind][pool];
for (oind = order; oind < VM_NFREEORDER; oind++) {
m = TAILQ_FIRST(&fl[oind].pl);
if (m != NULL) {
TAILQ_REMOVE(&fl[oind].pl, m, pageq);
fl[oind].lcnt--;
m->order = VM_NFREEORDER;
vm_phys_split_pages(m, oind, fl, order);
return (m);
}
}
/*
* The given pool was empty. Find the largest
* contiguous, power-of-two-sized set of pages in any
* pool. Transfer these pages to the given pool, and
* use them to satisfy the allocation.
*/
for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
alt = vm_phys_free_queues[flind][pind];
m = TAILQ_FIRST(&alt[oind].pl);
if (m != NULL) {
TAILQ_REMOVE(&fl[oind].pl, m, pageq);
fl[oind].lcnt--;
TAILQ_REMOVE(&alt[oind].pl, m, pageq);
alt[oind].lcnt--;
m->order = VM_NFREEORDER;
vm_phys_set_pool(pool, m, oind);
vm_phys_split_pages(m, oind, fl, order);
return (m);
}
}
/*
* The given pool was empty. Find the largest
* contiguous, power-of-two-sized set of pages in any
* pool. Transfer these pages to the given pool, and
* use them to satisfy the allocation.
*/
for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
alt = vm_phys_free_queues[flind][pind];
m = TAILQ_FIRST(&alt[oind].pl);
if (m != NULL) {
TAILQ_REMOVE(&alt[oind].pl, m, pageq);
alt[oind].lcnt--;
m->order = VM_NFREEORDER;
vm_phys_set_pool(pool, m, oind);
vm_phys_split_pages(m, oind, fl, order);
return (m);
}
}
}
}
return (NULL);
}
@ -592,7 +610,7 @@ vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
{
struct vm_freelist *fl;
struct vm_phys_seg *seg;
vm_object_t m_object;
struct vnode *vp;
vm_paddr_t pa, pa_last, size;
vm_page_t deferred_vdrop_list, m, m_ret;
int flind, i, oind, order, pind;
@ -687,50 +705,19 @@ vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
vm_phys_split_pages(m_ret, oind, fl, order);
for (i = 0; i < npages; i++) {
m = &m_ret[i];
KASSERT(m->queue == PQ_NONE,
("vm_phys_alloc_contig: page %p has unexpected queue %d",
m, m->queue));
KASSERT(m->wire_count == 0,
("vm_phys_alloc_contig: page %p is wired", m));
KASSERT(m->hold_count == 0,
("vm_phys_alloc_contig: page %p is held", m));
KASSERT(m->busy == 0,
("vm_phys_alloc_contig: page %p is busy", m));
KASSERT(m->dirty == 0,
("vm_phys_alloc_contig: page %p is dirty", m));
KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
("vm_phys_alloc_contig: page %p has unexpected memattr %d",
m, pmap_page_get_memattr(m)));
if ((m->flags & PG_CACHED) != 0) {
m->valid = 0;
m_object = m->object;
vm_page_cache_remove(m);
if (m_object->type == OBJT_VNODE &&
m_object->cache == NULL) {
/*
* Enqueue the vnode for deferred vdrop().
*
* Unmanaged pages don't use "pageq", so it
* can be safely abused to construct a short-
* lived queue of vnodes.
*/
m->pageq.tqe_prev = m_object->handle;
m->pageq.tqe_next = deferred_vdrop_list;
deferred_vdrop_list = m;
}
} else {
KASSERT(VM_PAGE_IS_FREE(m),
("vm_phys_alloc_contig: page %p is not free", m));
KASSERT(m->valid == 0,
("vm_phys_alloc_contig: free page %p is valid", m));
cnt.v_free_count--;
vp = vm_page_alloc_init(m);
if (vp != NULL) {
/*
* Enqueue the vnode for deferred vdrop().
*
* Unmanaged pages don't use "pageq", so it
* can be safely abused to construct a short-
* lived queue of vnodes.
*/
m->pageq.tqe_prev = (void *)vp;
m->pageq.tqe_next = deferred_vdrop_list;
deferred_vdrop_list = m;
}
if (m->flags & PG_ZERO)
vm_page_zero_count--;
/* Don't clear the PG_ZERO flag; we'll need it later. */
m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
m->oflags = 0;
/* Unmanaged pages don't use "act_count". */
}
for (; i < roundup2(npages, 1 << imin(oind, order)); i++) {
m = &m_ret[i];

View File

@ -44,6 +44,7 @@ void vm_phys_add_page(vm_paddr_t pa);
vm_page_t vm_phys_alloc_contig(unsigned long npages,
vm_paddr_t low, vm_paddr_t high,
unsigned long alignment, unsigned long boundary);
vm_page_t vm_phys_alloc_freelist_pages(int flind, int pool, int order);
vm_page_t vm_phys_alloc_pages(int pool, int order);
vm_paddr_t vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment);
void vm_phys_free_pages(vm_page_t m, int order);