The physical memory allocator supports the use of distinct free lists for

managing pages from different address ranges.  Generally speaking, this
feature is used to increase the likelihood that physical pages are
available that can meet special DMA requirements or can be accessed through
a limited-coverage direct mapping (e.g., MIPS).  However, prior to this
change, the configuration of the free lists was static, i.e., it was
determined at compile time.  Consequentally, free lists could be created
for address ranges that held no actual pages, for example, on 32-bit MIPS-
based systems with 512 MB or less of physical memory.  This change makes
the creation of the free lists dynamic, i.e., it is based on the available
physical memory at boot time.

On 64-bit x86-based systems with 64 GB or more of physical memory, create
free lists for managing pages with physical addresses below 4 GB.  This
change is to address reported problems with initializing devices that
require the allocation of physical pages below 4 GB on some systems with
128 GB or more of physical memory.

PR:		185727
Differential Revision:	https://reviews.freebsd.org/D1274
Reviewed by:	jhb, kib
MFC after:	3 weeks
Sponsored by:	EMC / Isilon Storage Division
This commit is contained in:
Alan Cox 2014-12-31 00:54:38 +00:00
parent cd86d3634b
commit d866a563d4
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=276439
4 changed files with 184 additions and 65 deletions

View File

@ -101,14 +101,22 @@
#define VM_FREEPOOL_DIRECT 1
/*
* Create two free page lists: VM_FREELIST_DEFAULT is for physical
* pages that are above the largest physical address that is
* accessible by ISA DMA and VM_FREELIST_ISADMA is for physical pages
* that are below that address.
* Create up to three free page lists: VM_FREELIST_DMA32 is for physical pages
* that have physical addresses below 4G but are not accessible by ISA DMA,
* and VM_FREELIST_ISADMA is for physical pages that are accessible by ISA
* DMA.
*/
#define VM_NFREELIST 2
#define VM_NFREELIST 3
#define VM_FREELIST_DEFAULT 0
#define VM_FREELIST_ISADMA 1
#define VM_FREELIST_DMA32 1
#define VM_FREELIST_ISADMA 2
/*
* Create the DMA32 free list only if the number of physical pages above
* physical address 4G is at least 16M, which amounts to 64GB of physical
* memory.
*/
#define VM_DMA32_NPAGES_THRESHOLD 16777216
/*
* An allocation size of 16MB is supported in order to optimize the

View File

@ -160,13 +160,11 @@
#define VM_FREEPOOL_DIRECT 1
/*
* we support 2 free lists:
*
* - DEFAULT for direct mapped (KSEG0) pages.
* Note: This usage of DEFAULT may be misleading because we use
* DEFAULT for allocating direct mapped pages. The normal page
* allocations use HIGHMEM if available, and then DEFAULT.
* - HIGHMEM for other pages
* Create up to two free lists on !__mips_n64: VM_FREELIST_DEFAULT is for
* physical pages that are above the largest physical address that is
* accessible through the direct map (KSEG0) and VM_FREELIST_LOWMEM is for
* physical pages that are below that address. VM_LOWMEM_BOUNDARY is the
* physical address for the end of the direct map (KSEG0).
*/
#ifdef __mips_n64
#define VM_NFREELIST 1
@ -174,10 +172,10 @@
#define VM_FREELIST_DIRECT VM_FREELIST_DEFAULT
#else
#define VM_NFREELIST 2
#define VM_FREELIST_DEFAULT 1
#define VM_FREELIST_HIGHMEM 0
#define VM_FREELIST_DIRECT VM_FREELIST_DEFAULT
#define VM_HIGHMEM_ADDRESS ((vm_paddr_t)0x20000000)
#define VM_FREELIST_DEFAULT 0
#define VM_FREELIST_LOWMEM 1
#define VM_FREELIST_DIRECT VM_FREELIST_LOWMEM
#define VM_LOWMEM_BOUNDARY ((vm_paddr_t)0x20000000)
#endif
/*

View File

@ -101,7 +101,32 @@ MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
static struct vm_freelist
vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
static int vm_nfreelists;
/*
* Provides the mapping from VM_FREELIST_* to free list indices (flind).
*/
static int vm_freelist_to_flind[VM_NFREELIST];
CTASSERT(VM_FREELIST_DEFAULT == 0);
#ifdef VM_FREELIST_ISADMA
#define VM_ISADMA_BOUNDARY 16777216
#endif
#ifdef VM_FREELIST_DMA32
#define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32)
#endif
/*
* Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about
* the ordering of the free list boundaries.
*/
#if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY)
CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY);
#endif
#if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY)
CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY);
#endif
static int cnt_prezero;
SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
@ -120,9 +145,8 @@ SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
int order);
static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
int domain);
static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
static int vm_phys_paddr_to_segind(vm_paddr_t pa);
static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
int order);
@ -298,7 +322,7 @@ vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
* Create a physical memory segment.
*/
static void
_vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
_vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
{
struct vm_phys_seg *seg;
@ -314,16 +338,15 @@ _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
seg->start = start;
seg->end = end;
seg->domain = domain;
seg->free_queues = &vm_phys_free_queues[domain][flind];
}
static void
vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
{
int i;
if (mem_affinity == NULL) {
_vm_phys_create_seg(start, end, flind, 0);
_vm_phys_create_seg(start, end, 0);
return;
}
@ -336,11 +359,11 @@ vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
panic("No affinity info for start %jx",
(uintmax_t)start);
if (mem_affinity[i].end >= end) {
_vm_phys_create_seg(start, end, flind,
_vm_phys_create_seg(start, end,
mem_affinity[i].domain);
break;
}
_vm_phys_create_seg(start, mem_affinity[i].end, flind,
_vm_phys_create_seg(start, mem_affinity[i].end,
mem_affinity[i].domain);
start = mem_affinity[i].end;
}
@ -352,64 +375,149 @@ vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
void
vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end)
{
vm_paddr_t paddr;
KASSERT((start & PAGE_MASK) == 0,
("vm_phys_define_seg: start is not page aligned"));
KASSERT((end & PAGE_MASK) == 0,
("vm_phys_define_seg: end is not page aligned"));
/*
* Split the physical memory segment if it spans two or more free
* list boundaries.
*/
paddr = start;
#ifdef VM_FREELIST_ISADMA
if (start < 16777216) {
if (end > 16777216) {
vm_phys_create_seg(start, 16777216,
VM_FREELIST_ISADMA);
vm_phys_create_seg(16777216, end, VM_FREELIST_DEFAULT);
} else
vm_phys_create_seg(start, end, VM_FREELIST_ISADMA);
if (VM_FREELIST_ISADMA >= vm_nfreelists)
vm_nfreelists = VM_FREELIST_ISADMA + 1;
} else
if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) {
vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY);
paddr = VM_ISADMA_BOUNDARY;
}
#endif
#ifdef VM_FREELIST_HIGHMEM
if (end > VM_HIGHMEM_ADDRESS) {
if (start < VM_HIGHMEM_ADDRESS) {
vm_phys_create_seg(start, VM_HIGHMEM_ADDRESS,
VM_FREELIST_DEFAULT);
vm_phys_create_seg(VM_HIGHMEM_ADDRESS, end,
VM_FREELIST_HIGHMEM);
} else
vm_phys_create_seg(start, end, VM_FREELIST_HIGHMEM);
if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
} else
#ifdef VM_FREELIST_LOWMEM
if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) {
vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY);
paddr = VM_LOWMEM_BOUNDARY;
}
#endif
vm_phys_create_seg(start, end, VM_FREELIST_DEFAULT);
#ifdef VM_FREELIST_DMA32
if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) {
vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY);
paddr = VM_DMA32_BOUNDARY;
}
#endif
vm_phys_create_seg(paddr, end);
}
/*
* Initialize the physical memory allocator.
*
* Requires that vm_page_array is initialized!
*/
void
vm_phys_init(void)
{
struct vm_freelist *fl;
struct vm_phys_seg *seg;
#ifdef VM_PHYSSEG_SPARSE
long pages;
#endif
int dom, flind, oind, pind, segind;
u_long npages;
int dom, flind, freelist, oind, pind, segind;
/*
* Compute the number of free lists, and generate the mapping from the
* manifest constants VM_FREELIST_* to the free list indices.
*
* Initially, the entries of vm_freelist_to_flind[] are set to either
* 0 or 1 to indicate which free lists should be created.
*/
npages = 0;
for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
seg = &vm_phys_segs[segind];
#ifdef VM_FREELIST_ISADMA
if (seg->end <= VM_ISADMA_BOUNDARY)
vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1;
else
#endif
#ifdef VM_FREELIST_LOWMEM
if (seg->end <= VM_LOWMEM_BOUNDARY)
vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1;
else
#endif
#ifdef VM_FREELIST_DMA32
if (
#ifdef VM_DMA32_NPAGES_THRESHOLD
/*
* Create the DMA32 free list only if the amount of
* physical memory above physical address 4G exceeds the
* given threshold.
*/
npages > VM_DMA32_NPAGES_THRESHOLD &&
#endif
seg->end <= VM_DMA32_BOUNDARY)
vm_freelist_to_flind[VM_FREELIST_DMA32] = 1;
else
#endif
{
npages += atop(seg->end - seg->start);
vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1;
}
}
/* Change each entry into a running total of the free lists. */
for (freelist = 1; freelist < VM_NFREELIST; freelist++) {
vm_freelist_to_flind[freelist] +=
vm_freelist_to_flind[freelist - 1];
}
vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1];
KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists"));
/* Change each entry into a free list index. */
for (freelist = 0; freelist < VM_NFREELIST; freelist++)
vm_freelist_to_flind[freelist]--;
/*
* Initialize the first_page and free_queues fields of each physical
* memory segment.
*/
#ifdef VM_PHYSSEG_SPARSE
pages = 0;
npages = 0;
#endif
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
#ifdef VM_PHYSSEG_SPARSE
seg->first_page = &vm_page_array[pages];
pages += atop(seg->end - seg->start);
seg->first_page = &vm_page_array[npages];
npages += atop(seg->end - seg->start);
#else
seg->first_page = PHYS_TO_VM_PAGE(seg->start);
#endif
#ifdef VM_FREELIST_ISADMA
if (seg->end <= VM_ISADMA_BOUNDARY) {
flind = vm_freelist_to_flind[VM_FREELIST_ISADMA];
KASSERT(flind >= 0,
("vm_phys_init: ISADMA flind < 0"));
} else
#endif
#ifdef VM_FREELIST_LOWMEM
if (seg->end <= VM_LOWMEM_BOUNDARY) {
flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM];
KASSERT(flind >= 0,
("vm_phys_init: LOWMEM flind < 0"));
} else
#endif
#ifdef VM_FREELIST_DMA32
if (seg->end <= VM_DMA32_BOUNDARY) {
flind = vm_freelist_to_flind[VM_FREELIST_DMA32];
KASSERT(flind >= 0,
("vm_phys_init: DMA32 flind < 0"));
} else
#endif
{
flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT];
KASSERT(flind >= 0,
("vm_phys_init: DEFAULT flind < 0"));
}
seg->free_queues = &vm_phys_free_queues[seg->domain][flind];
}
/*
* Initialize the free queues.
*/
for (dom = 0; dom < vm_ndomains; dom++) {
for (flind = 0; flind < vm_nfreelists; flind++) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
@ -419,6 +527,7 @@ vm_phys_init(void)
}
}
}
rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
}
@ -498,25 +607,29 @@ vm_phys_alloc_pages(int pool, int order)
}
/*
* Find and dequeue a free page on the given free list, with the
* specified pool and order
* Allocate a contiguous, power of two-sized set of physical pages from the
* specified free list. The free list must be specified using one of the
* manifest constants VM_FREELIST_*.
*
* The free page queues must be locked.
*/
vm_page_t
vm_phys_alloc_freelist_pages(int flind, int pool, int order)
vm_phys_alloc_freelist_pages(int freelist, int pool, int order)
{
vm_page_t m;
int dom, domain;
KASSERT(flind < VM_NFREELIST,
("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
KASSERT(freelist < VM_NFREELIST,
("vm_phys_alloc_freelist_pages: freelist %d is out of range",
freelist));
KASSERT(pool < VM_NFREEPOOL,
("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
KASSERT(order < VM_NFREEORDER,
("vm_phys_alloc_freelist_pages: order %d is out of range", order));
for (dom = 0; dom < vm_ndomains; dom++) {
domain = vm_rr_selectdomain();
m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
m = vm_phys_alloc_domain_pages(domain,
vm_freelist_to_flind[freelist], pool, order);
if (m != NULL)
return (m);
}

View File

@ -72,7 +72,7 @@ void vm_phys_add_page(vm_paddr_t pa);
void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end);
vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary);
vm_page_t vm_phys_alloc_freelist_pages(int flind, int pool, int order);
vm_page_t vm_phys_alloc_freelist_pages(int freelist, int pool, int order);
vm_page_t vm_phys_alloc_pages(int pool, int order);
boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high);
int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,