This is a cleanup patch to Peter's new OBJT_PHYS VM object type
and sysv shared memory support for it. It implements a new PG_UNMANAGED flag that has slightly different characteristics from PG_FICTICIOUS. A new sysctl, kern.ipc.shm_use_phys has been added to enable the use of physically-backed sysv shared memory rather then swap-backed. Physically backed shm segments are not tracked with PV entries, allowing programs which use a large shm segment as a rendezvous point to operate without eating an insane amount of KVM in the PV entry management. Read: Oracle. Peter's OBJT_PHYS object will also allow us to eventually implement page-table sharing and/or 4MB physical page support for such segments. We're half way there.
This commit is contained in:
parent
b620c1f5d6
commit
8b03c8ed5e
sys
amd64/amd64
i386/i386
kern
vm
@ -2095,7 +2095,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
* raise IPL while manipulating pv_table since pmap_enter can be
|
||||
* called at interrupt time.
|
||||
*/
|
||||
if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
|
||||
if (pmap_initialized &&
|
||||
(m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
|
||||
pmap_insert_entry(pmap, va, mpte, m);
|
||||
pa |= PG_MANAGED;
|
||||
}
|
||||
@ -2223,7 +2224,8 @@ retry:
|
||||
* raise IPL while manipulating pv_table since pmap_enter can be
|
||||
* called at interrupt time.
|
||||
*/
|
||||
pmap_insert_entry(pmap, va, mpte, m);
|
||||
if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
|
||||
pmap_insert_entry(pmap, va, mpte, m);
|
||||
|
||||
/*
|
||||
* Increment counters
|
||||
@ -2235,7 +2237,10 @@ retry:
|
||||
/*
|
||||
* Now validate mapping with RO protection
|
||||
*/
|
||||
*pte = pa | PG_V | PG_U | PG_MANAGED;
|
||||
if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
|
||||
*pte = pa | PG_V | PG_U;
|
||||
else
|
||||
*pte = pa | PG_V | PG_U | PG_MANAGED;
|
||||
|
||||
return mpte;
|
||||
}
|
||||
|
@ -2095,7 +2095,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
* raise IPL while manipulating pv_table since pmap_enter can be
|
||||
* called at interrupt time.
|
||||
*/
|
||||
if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
|
||||
if (pmap_initialized &&
|
||||
(m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
|
||||
pmap_insert_entry(pmap, va, mpte, m);
|
||||
pa |= PG_MANAGED;
|
||||
}
|
||||
@ -2223,7 +2224,8 @@ retry:
|
||||
* raise IPL while manipulating pv_table since pmap_enter can be
|
||||
* called at interrupt time.
|
||||
*/
|
||||
pmap_insert_entry(pmap, va, mpte, m);
|
||||
if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
|
||||
pmap_insert_entry(pmap, va, mpte, m);
|
||||
|
||||
/*
|
||||
* Increment counters
|
||||
@ -2235,7 +2237,10 @@ retry:
|
||||
/*
|
||||
* Now validate mapping with RO protection
|
||||
*/
|
||||
*pte = pa | PG_V | PG_U | PG_MANAGED;
|
||||
if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
|
||||
*pte = pa | PG_V | PG_U;
|
||||
else
|
||||
*pte = pa | PG_V | PG_U | PG_MANAGED;
|
||||
|
||||
return mpte;
|
||||
}
|
||||
|
@ -126,12 +126,15 @@ struct shminfo shminfo = {
|
||||
SHMALL
|
||||
};
|
||||
|
||||
static int shm_use_phys;
|
||||
|
||||
SYSCTL_DECL(_kern_ipc);
|
||||
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, "");
|
||||
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, "");
|
||||
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, "");
|
||||
SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RW, &shminfo.shmseg, 0, "");
|
||||
SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, "");
|
||||
SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW, &shm_use_phys, 0, "");
|
||||
|
||||
static int
|
||||
shm_find_segment_by_key(key)
|
||||
@ -528,13 +531,13 @@ shmget_allocate_segment(p, uap, mode)
|
||||
* We make sure that we have allocated a pager before we need
|
||||
* to.
|
||||
*/
|
||||
#ifdef SHM_PHYS_BACKED
|
||||
shm_handle->shm_object =
|
||||
vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
|
||||
#else
|
||||
shm_handle->shm_object =
|
||||
vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
|
||||
#endif
|
||||
if (shm_use_phys) {
|
||||
shm_handle->shm_object =
|
||||
vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
|
||||
} else {
|
||||
shm_handle->shm_object =
|
||||
vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
|
||||
}
|
||||
vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
|
||||
vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);
|
||||
|
||||
|
@ -104,7 +104,9 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
|
||||
object = vm_object_allocate(OBJT_PHYS,
|
||||
OFF_TO_IDX(foff + size));
|
||||
object->handle = handle;
|
||||
#if 0
|
||||
TAILQ_INIT(&object->un_pager.physp.physp_pglist);
|
||||
#endif
|
||||
TAILQ_INSERT_TAIL(&phys_pager_object_list, object,
|
||||
pager_object_list);
|
||||
} else {
|
||||
@ -131,20 +133,6 @@ phys_pager_dealloc(object)
|
||||
int s;
|
||||
|
||||
TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list);
|
||||
/*
|
||||
* Free up our fake pages.
|
||||
*/
|
||||
s = splvm();
|
||||
while ((m = TAILQ_FIRST(&object->un_pager.physp.physp_pglist)) != 0) {
|
||||
TAILQ_REMOVE(&object->un_pager.physp.physp_pglist, m, pageq);
|
||||
/* return the page back to normal */
|
||||
m->flags &= ~PG_FICTITIOUS;
|
||||
m->dirty = 0;
|
||||
vm_page_unwire(m, 0);
|
||||
vm_page_flag_clear(m, PG_ZERO);
|
||||
vm_page_free(m);
|
||||
}
|
||||
splx(s);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -165,8 +153,7 @@ phys_pager_getpages(object, m, count, reqpage)
|
||||
vm_page_zero_fill(m[i]);
|
||||
vm_page_flag_set(m[i], PG_ZERO);
|
||||
/* Switch off pv_entries */
|
||||
vm_page_wire(m[i]);
|
||||
vm_page_flag_set(m[i], PG_FICTITIOUS);
|
||||
vm_page_unmanage(m[i]);
|
||||
m[i]->valid = VM_PAGE_BITS_ALL;
|
||||
m[i]->dirty = 0;
|
||||
/* The requested page must remain busy, the others not. */
|
||||
@ -174,8 +161,6 @@ phys_pager_getpages(object, m, count, reqpage)
|
||||
vm_page_flag_clear(m[i], PG_BUSY);
|
||||
m[i]->busy = 0;
|
||||
}
|
||||
TAILQ_INSERT_TAIL(&object->un_pager.physp.physp_pglist, m[i],
|
||||
pageq);
|
||||
}
|
||||
splx(s);
|
||||
|
||||
|
@ -423,7 +423,7 @@ readrest:
|
||||
if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL))
|
||||
break;
|
||||
if (mt->busy ||
|
||||
(mt->flags & (PG_BUSY | PG_FICTITIOUS)) ||
|
||||
(mt->flags & (PG_BUSY | PG_FICTITIOUS | PG_UNMANAGED)) ||
|
||||
mt->hold_count ||
|
||||
mt->wire_count)
|
||||
continue;
|
||||
|
@ -833,12 +833,14 @@ shadowlookup:
|
||||
|
||||
/*
|
||||
* If the page is busy or not in a normal active state,
|
||||
* we skip it. Things can break if we mess with pages
|
||||
* in any of the below states.
|
||||
* we skip it. If the page is not managed there are no
|
||||
* page queues to mess with. Things can break if we mess
|
||||
* with pages in any of the below states.
|
||||
*/
|
||||
if (
|
||||
m->hold_count ||
|
||||
m->wire_count ||
|
||||
(m->flags & PG_UNMANAGED) ||
|
||||
m->valid != VM_PAGE_BITS_ALL
|
||||
) {
|
||||
continue;
|
||||
@ -1394,6 +1396,13 @@ vm_object_page_remove(object, start, end, clean_only)
|
||||
|
||||
all = ((end == 0) && (start == 0));
|
||||
|
||||
/*
|
||||
* Since physically-backed objects do not use managed pages, we can't
|
||||
* remove pages from the object (we must instead remove the page
|
||||
* references, and then destroy the object).
|
||||
*/
|
||||
KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object"));
|
||||
|
||||
vm_object_pip_add(object, 1);
|
||||
again:
|
||||
size = end - start;
|
||||
|
@ -123,6 +123,7 @@ struct vm_object {
|
||||
TAILQ_HEAD(, vm_page) devp_pglist;
|
||||
} devp;
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Physmem pager
|
||||
*
|
||||
@ -131,6 +132,7 @@ struct vm_object {
|
||||
struct {
|
||||
TAILQ_HEAD(, vm_page) physp_pglist;
|
||||
} physp;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Swap pager
|
||||
|
@ -688,7 +688,7 @@ vm_page_select_cache(object, pindex)
|
||||
(pindex + object->pg_color) & PQ_L2_MASK,
|
||||
FALSE
|
||||
);
|
||||
if (m && ((m->flags & PG_BUSY) || m->busy ||
|
||||
if (m && ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy ||
|
||||
m->hold_count || m->wire_count)) {
|
||||
vm_page_deactivate(m);
|
||||
continue;
|
||||
@ -997,7 +997,7 @@ vm_page_activate(m)
|
||||
|
||||
vm_page_unqueue(m);
|
||||
|
||||
if (m->wire_count == 0) {
|
||||
if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
|
||||
m->queue = PQ_ACTIVE;
|
||||
vm_page_queues[PQ_ACTIVE].lcnt++;
|
||||
TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
|
||||
@ -1128,9 +1128,17 @@ vm_page_free_toq(vm_page_t m)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the UNMANAGED flag when freeing an unmanaged page.
|
||||
*/
|
||||
|
||||
if (m->flags & PG_UNMANAGED) {
|
||||
m->flags &= ~PG_UNMANAGED;
|
||||
} else {
|
||||
#ifdef __alpha__
|
||||
pmap_page_is_free(m);
|
||||
pmap_page_is_free(m);
|
||||
#endif
|
||||
}
|
||||
|
||||
m->queue = PQ_FREE + m->pc;
|
||||
pq = &vm_page_queues[m->queue];
|
||||
@ -1154,6 +1162,39 @@ vm_page_free_toq(vm_page_t m)
|
||||
splx(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* vm_page_unmanage:
|
||||
*
|
||||
* Prevent PV management from being done on the page. The page is
|
||||
* removed from the paging queues as if it were wired, and as a
|
||||
* consequence of no longer being managed the pageout daemon will not
|
||||
* touch it (since there is no way to locate the pte mappings for the
|
||||
* page). madvise() calls that mess with the pmap will also no longer
|
||||
* operate on the page.
|
||||
*
|
||||
* Beyond that the page is still reasonably 'normal'. Freeing the page
|
||||
* will clear the flag.
|
||||
*
|
||||
* This routine is used by OBJT_PHYS objects - objects using unswappable
|
||||
* physical memory as backing store rather then swap-backed memory and
|
||||
* will eventually be extended to support 4MB unmanaged physical
|
||||
* mappings.
|
||||
*/
|
||||
|
||||
void
|
||||
vm_page_unmanage(vm_page_t m)
|
||||
{
|
||||
int s;
|
||||
|
||||
s = splvm();
|
||||
if ((m->flags & PG_UNMANAGED) == 0) {
|
||||
if (m->wire_count == 0)
|
||||
vm_page_unqueue(m);
|
||||
}
|
||||
vm_page_flag_set(m, PG_UNMANAGED);
|
||||
splx(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* vm_page_wire:
|
||||
*
|
||||
@ -1170,9 +1211,15 @@ vm_page_wire(m)
|
||||
{
|
||||
int s;
|
||||
|
||||
/*
|
||||
* Only bump the wire statistics if the page is not already wired,
|
||||
* and only unqueue the page if it is on some queue (if it is unmanaged
|
||||
* it is already off the queues).
|
||||
*/
|
||||
s = splvm();
|
||||
if (m->wire_count == 0) {
|
||||
vm_page_unqueue(m);
|
||||
if ((m->flags & PG_UNMANAGED) == 0)
|
||||
vm_page_unqueue(m);
|
||||
cnt.v_wire_count++;
|
||||
}
|
||||
m->wire_count++;
|
||||
@ -1218,7 +1265,9 @@ vm_page_unwire(m, activate)
|
||||
m->wire_count--;
|
||||
if (m->wire_count == 0) {
|
||||
cnt.v_wire_count--;
|
||||
if (activate) {
|
||||
if (m->flags & PG_UNMANAGED) {
|
||||
;
|
||||
} else if (activate) {
|
||||
TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
|
||||
m->queue = PQ_ACTIVE;
|
||||
vm_page_queues[PQ_ACTIVE].lcnt++;
|
||||
@ -1259,7 +1308,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
|
||||
return;
|
||||
|
||||
s = splvm();
|
||||
if (m->wire_count == 0) {
|
||||
if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
|
||||
if ((m->queue - m->pc) == PQ_CACHE)
|
||||
cnt.v_reactivated++;
|
||||
vm_page_unqueue(m);
|
||||
@ -1293,7 +1342,7 @@ vm_page_cache(m)
|
||||
{
|
||||
int s;
|
||||
|
||||
if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
|
||||
if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->wire_count) {
|
||||
printf("vm_page_cache: attempting to cache busy page\n");
|
||||
return;
|
||||
}
|
||||
|
@ -225,6 +225,13 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
|
||||
* These are the flags defined for vm_page.
|
||||
*
|
||||
* Note: PG_FILLED and PG_DIRTY are added for the filesystems.
|
||||
*
|
||||
* Note: PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is
|
||||
* not under PV management but otherwise should be treated as a
|
||||
* normal page. Pages not under PV management cannot be paged out
|
||||
* via the object/vm_page_t because there is no knowledge of their
|
||||
* pte mappings, nor can they be removed from their objects via
|
||||
* the object, and such pages are also not on any PQ queue.
|
||||
*/
|
||||
#define PG_BUSY 0x0001 /* page is in transit (O) */
|
||||
#define PG_WANTED 0x0002 /* someone is waiting for page (O) */
|
||||
@ -236,6 +243,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
|
||||
#define PG_CLEANCHK 0x0100 /* page will be checked for cleaning */
|
||||
#define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */
|
||||
#define PG_NOSYNC 0x0400 /* do not collect for syncer */
|
||||
#define PG_UNMANAGED 0x0800 /* No PV management for page */
|
||||
|
||||
/*
|
||||
* Misc constants.
|
||||
@ -399,6 +407,7 @@ void vm_page_remove __P((vm_page_t));
|
||||
void vm_page_rename __P((vm_page_t, vm_object_t, vm_pindex_t));
|
||||
vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t));
|
||||
vm_page_t vm_add_new_page __P((vm_offset_t pa));
|
||||
void vm_page_unmanage __P((vm_page_t));
|
||||
void vm_page_unwire __P((vm_page_t, int));
|
||||
void vm_page_wire __P((vm_page_t));
|
||||
void vm_page_unqueue __P((vm_page_t));
|
||||
|
@ -233,11 +233,12 @@ vm_pageout_clean(m)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Don't mess with the page if it's busy.
|
||||
* Don't mess with the page if it's busy, held, or special
|
||||
*/
|
||||
if ((m->hold_count != 0) ||
|
||||
((m->busy != 0) || (m->flags & PG_BUSY)))
|
||||
((m->busy != 0) || (m->flags & (PG_BUSY|PG_UNMANAGED)))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
mc[vm_pageout_page_count] = m;
|
||||
pageout_count = 1;
|
||||
@ -279,7 +280,7 @@ more:
|
||||
break;
|
||||
}
|
||||
if (((p->queue - p->pc) == PQ_CACHE) ||
|
||||
(p->flags & PG_BUSY) || p->busy) {
|
||||
(p->flags & (PG_BUSY|PG_UNMANAGED)) || p->busy) {
|
||||
ib = 0;
|
||||
break;
|
||||
}
|
||||
@ -309,7 +310,7 @@ more:
|
||||
if ((p = vm_page_lookup(object, pindex + is)) == NULL)
|
||||
break;
|
||||
if (((p->queue - p->pc) == PQ_CACHE) ||
|
||||
(p->flags & PG_BUSY) || p->busy) {
|
||||
(p->flags & (PG_BUSY|PG_UNMANAGED)) || p->busy) {
|
||||
break;
|
||||
}
|
||||
vm_page_test_dirty(p);
|
||||
@ -474,7 +475,7 @@ vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
|
||||
if (p->wire_count != 0 ||
|
||||
p->hold_count != 0 ||
|
||||
p->busy != 0 ||
|
||||
(p->flags & PG_BUSY) ||
|
||||
(p->flags & (PG_BUSY|PG_UNMANAGED)) ||
|
||||
!pmap_page_exists(vm_map_pmap(map), p)) {
|
||||
p = next;
|
||||
continue;
|
||||
@ -1047,7 +1048,10 @@ rescan0:
|
||||
m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE);
|
||||
if (!m)
|
||||
break;
|
||||
if ((m->flags & PG_BUSY) || m->busy || m->hold_count || m->wire_count) {
|
||||
if ((m->flags & (PG_BUSY|PG_UNMANAGED)) ||
|
||||
m->busy ||
|
||||
m->hold_count ||
|
||||
m->wire_count) {
|
||||
#ifdef INVARIANTS
|
||||
printf("Warning: busy page %p found in cache\n", m);
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user