This is a cleanup patch to Peter's new OBJT_PHYS VM object type

and sysv shared memory support for it.  It implements a new
    PG_UNMANAGED flag that has slightly different characteristics
    from PG_FICTICIOUS.

    A new sysctl, kern.ipc.shm_use_phys has been added to enable the
    use of physically-backed sysv shared memory rather then swap-backed.
    Physically backed shm segments are not tracked with PV entries,
    allowing programs which use a large shm segment as a rendezvous
    point to operate without eating an insane amount of KVM in the
    PV entry management.  Read: Oracle.

    Peter's OBJT_PHYS object will also allow us to eventually implement
    page-table sharing and/or 4MB physical page support for such segments.
    We're half way there.
This commit is contained in:
Matthew Dillon 2000-05-29 22:40:54 +00:00
parent b620c1f5d6
commit 8b03c8ed5e
10 changed files with 118 additions and 47 deletions

View File

@ -2095,7 +2095,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
* raise IPL while manipulating pv_table since pmap_enter can be * raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time. * called at interrupt time.
*/ */
if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) { if (pmap_initialized &&
(m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
pmap_insert_entry(pmap, va, mpte, m); pmap_insert_entry(pmap, va, mpte, m);
pa |= PG_MANAGED; pa |= PG_MANAGED;
} }
@ -2223,7 +2224,8 @@ retry:
* raise IPL while manipulating pv_table since pmap_enter can be * raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time. * called at interrupt time.
*/ */
pmap_insert_entry(pmap, va, mpte, m); if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
pmap_insert_entry(pmap, va, mpte, m);
/* /*
* Increment counters * Increment counters
@ -2235,7 +2237,10 @@ retry:
/* /*
* Now validate mapping with RO protection * Now validate mapping with RO protection
*/ */
*pte = pa | PG_V | PG_U | PG_MANAGED; if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
*pte = pa | PG_V | PG_U;
else
*pte = pa | PG_V | PG_U | PG_MANAGED;
return mpte; return mpte;
} }

View File

@ -2095,7 +2095,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
* raise IPL while manipulating pv_table since pmap_enter can be * raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time. * called at interrupt time.
*/ */
if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) { if (pmap_initialized &&
(m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
pmap_insert_entry(pmap, va, mpte, m); pmap_insert_entry(pmap, va, mpte, m);
pa |= PG_MANAGED; pa |= PG_MANAGED;
} }
@ -2223,7 +2224,8 @@ retry:
* raise IPL while manipulating pv_table since pmap_enter can be * raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time. * called at interrupt time.
*/ */
pmap_insert_entry(pmap, va, mpte, m); if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
pmap_insert_entry(pmap, va, mpte, m);
/* /*
* Increment counters * Increment counters
@ -2235,7 +2237,10 @@ retry:
/* /*
* Now validate mapping with RO protection * Now validate mapping with RO protection
*/ */
*pte = pa | PG_V | PG_U | PG_MANAGED; if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
*pte = pa | PG_V | PG_U;
else
*pte = pa | PG_V | PG_U | PG_MANAGED;
return mpte; return mpte;
} }

View File

@ -126,12 +126,15 @@ struct shminfo shminfo = {
SHMALL SHMALL
}; };
static int shm_use_phys;
SYSCTL_DECL(_kern_ipc); SYSCTL_DECL(_kern_ipc);
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RW, &shminfo.shmseg, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RW, &shminfo.shmseg, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW, &shm_use_phys, 0, "");
static int static int
shm_find_segment_by_key(key) shm_find_segment_by_key(key)
@ -528,13 +531,13 @@ shmget_allocate_segment(p, uap, mode)
* We make sure that we have allocated a pager before we need * We make sure that we have allocated a pager before we need
* to. * to.
*/ */
#ifdef SHM_PHYS_BACKED if (shm_use_phys) {
shm_handle->shm_object = shm_handle->shm_object =
vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0); vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
#else } else {
shm_handle->shm_object = shm_handle->shm_object =
vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0); vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
#endif }
vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING); vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT); vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);

View File

@ -104,7 +104,9 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
object = vm_object_allocate(OBJT_PHYS, object = vm_object_allocate(OBJT_PHYS,
OFF_TO_IDX(foff + size)); OFF_TO_IDX(foff + size));
object->handle = handle; object->handle = handle;
#if 0
TAILQ_INIT(&object->un_pager.physp.physp_pglist); TAILQ_INIT(&object->un_pager.physp.physp_pglist);
#endif
TAILQ_INSERT_TAIL(&phys_pager_object_list, object, TAILQ_INSERT_TAIL(&phys_pager_object_list, object,
pager_object_list); pager_object_list);
} else { } else {
@ -131,20 +133,6 @@ phys_pager_dealloc(object)
int s; int s;
TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list); TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list);
/*
* Free up our fake pages.
*/
s = splvm();
while ((m = TAILQ_FIRST(&object->un_pager.physp.physp_pglist)) != 0) {
TAILQ_REMOVE(&object->un_pager.physp.physp_pglist, m, pageq);
/* return the page back to normal */
m->flags &= ~PG_FICTITIOUS;
m->dirty = 0;
vm_page_unwire(m, 0);
vm_page_flag_clear(m, PG_ZERO);
vm_page_free(m);
}
splx(s);
} }
static int static int
@ -165,8 +153,7 @@ phys_pager_getpages(object, m, count, reqpage)
vm_page_zero_fill(m[i]); vm_page_zero_fill(m[i]);
vm_page_flag_set(m[i], PG_ZERO); vm_page_flag_set(m[i], PG_ZERO);
/* Switch off pv_entries */ /* Switch off pv_entries */
vm_page_wire(m[i]); vm_page_unmanage(m[i]);
vm_page_flag_set(m[i], PG_FICTITIOUS);
m[i]->valid = VM_PAGE_BITS_ALL; m[i]->valid = VM_PAGE_BITS_ALL;
m[i]->dirty = 0; m[i]->dirty = 0;
/* The requested page must remain busy, the others not. */ /* The requested page must remain busy, the others not. */
@ -174,8 +161,6 @@ phys_pager_getpages(object, m, count, reqpage)
vm_page_flag_clear(m[i], PG_BUSY); vm_page_flag_clear(m[i], PG_BUSY);
m[i]->busy = 0; m[i]->busy = 0;
} }
TAILQ_INSERT_TAIL(&object->un_pager.physp.physp_pglist, m[i],
pageq);
} }
splx(s); splx(s);

View File

@ -423,7 +423,7 @@ readrest:
if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL)) if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL))
break; break;
if (mt->busy || if (mt->busy ||
(mt->flags & (PG_BUSY | PG_FICTITIOUS)) || (mt->flags & (PG_BUSY | PG_FICTITIOUS | PG_UNMANAGED)) ||
mt->hold_count || mt->hold_count ||
mt->wire_count) mt->wire_count)
continue; continue;

View File

@ -833,12 +833,14 @@ shadowlookup:
/* /*
* If the page is busy or not in a normal active state, * If the page is busy or not in a normal active state,
* we skip it. Things can break if we mess with pages * we skip it. If the page is not managed there are no
* in any of the below states. * page queues to mess with. Things can break if we mess
* with pages in any of the below states.
*/ */
if ( if (
m->hold_count || m->hold_count ||
m->wire_count || m->wire_count ||
(m->flags & PG_UNMANAGED) ||
m->valid != VM_PAGE_BITS_ALL m->valid != VM_PAGE_BITS_ALL
) { ) {
continue; continue;
@ -1394,6 +1396,13 @@ vm_object_page_remove(object, start, end, clean_only)
all = ((end == 0) && (start == 0)); all = ((end == 0) && (start == 0));
/*
* Since physically-backed objects do not use managed pages, we can't
* remove pages from the object (we must instead remove the page
* references, and then destroy the object).
*/
KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object"));
vm_object_pip_add(object, 1); vm_object_pip_add(object, 1);
again: again:
size = end - start; size = end - start;

View File

@ -123,6 +123,7 @@ struct vm_object {
TAILQ_HEAD(, vm_page) devp_pglist; TAILQ_HEAD(, vm_page) devp_pglist;
} devp; } devp;
#if 0
/* /*
* Physmem pager * Physmem pager
* *
@ -131,6 +132,7 @@ struct vm_object {
struct { struct {
TAILQ_HEAD(, vm_page) physp_pglist; TAILQ_HEAD(, vm_page) physp_pglist;
} physp; } physp;
#endif
/* /*
* Swap pager * Swap pager

View File

@ -688,7 +688,7 @@ vm_page_select_cache(object, pindex)
(pindex + object->pg_color) & PQ_L2_MASK, (pindex + object->pg_color) & PQ_L2_MASK,
FALSE FALSE
); );
if (m && ((m->flags & PG_BUSY) || m->busy || if (m && ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy ||
m->hold_count || m->wire_count)) { m->hold_count || m->wire_count)) {
vm_page_deactivate(m); vm_page_deactivate(m);
continue; continue;
@ -997,7 +997,7 @@ vm_page_activate(m)
vm_page_unqueue(m); vm_page_unqueue(m);
if (m->wire_count == 0) { if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
m->queue = PQ_ACTIVE; m->queue = PQ_ACTIVE;
vm_page_queues[PQ_ACTIVE].lcnt++; vm_page_queues[PQ_ACTIVE].lcnt++;
TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq); TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
@ -1128,9 +1128,17 @@ vm_page_free_toq(vm_page_t m)
} }
} }
/*
* Clear the UNMANAGED flag when freeing an unmanaged page.
*/
if (m->flags & PG_UNMANAGED) {
m->flags &= ~PG_UNMANAGED;
} else {
#ifdef __alpha__ #ifdef __alpha__
pmap_page_is_free(m); pmap_page_is_free(m);
#endif #endif
}
m->queue = PQ_FREE + m->pc; m->queue = PQ_FREE + m->pc;
pq = &vm_page_queues[m->queue]; pq = &vm_page_queues[m->queue];
@ -1154,6 +1162,39 @@ vm_page_free_toq(vm_page_t m)
splx(s); splx(s);
} }
/*
* vm_page_unmanage:
*
* Prevent PV management from being done on the page. The page is
* removed from the paging queues as if it were wired, and as a
* consequence of no longer being managed the pageout daemon will not
* touch it (since there is no way to locate the pte mappings for the
* page). madvise() calls that mess with the pmap will also no longer
* operate on the page.
*
* Beyond that the page is still reasonably 'normal'. Freeing the page
* will clear the flag.
*
* This routine is used by OBJT_PHYS objects - objects using unswappable
* physical memory as backing store rather then swap-backed memory and
* will eventually be extended to support 4MB unmanaged physical
* mappings.
*/
void
vm_page_unmanage(vm_page_t m)
{
int s;
s = splvm();
if ((m->flags & PG_UNMANAGED) == 0) {
if (m->wire_count == 0)
vm_page_unqueue(m);
}
vm_page_flag_set(m, PG_UNMANAGED);
splx(s);
}
/* /*
* vm_page_wire: * vm_page_wire:
* *
@ -1170,9 +1211,15 @@ vm_page_wire(m)
{ {
int s; int s;
/*
* Only bump the wire statistics if the page is not already wired,
* and only unqueue the page if it is on some queue (if it is unmanaged
* it is already off the queues).
*/
s = splvm(); s = splvm();
if (m->wire_count == 0) { if (m->wire_count == 0) {
vm_page_unqueue(m); if ((m->flags & PG_UNMANAGED) == 0)
vm_page_unqueue(m);
cnt.v_wire_count++; cnt.v_wire_count++;
} }
m->wire_count++; m->wire_count++;
@ -1218,7 +1265,9 @@ vm_page_unwire(m, activate)
m->wire_count--; m->wire_count--;
if (m->wire_count == 0) { if (m->wire_count == 0) {
cnt.v_wire_count--; cnt.v_wire_count--;
if (activate) { if (m->flags & PG_UNMANAGED) {
;
} else if (activate) {
TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq); TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
m->queue = PQ_ACTIVE; m->queue = PQ_ACTIVE;
vm_page_queues[PQ_ACTIVE].lcnt++; vm_page_queues[PQ_ACTIVE].lcnt++;
@ -1259,7 +1308,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
return; return;
s = splvm(); s = splvm();
if (m->wire_count == 0) { if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
if ((m->queue - m->pc) == PQ_CACHE) if ((m->queue - m->pc) == PQ_CACHE)
cnt.v_reactivated++; cnt.v_reactivated++;
vm_page_unqueue(m); vm_page_unqueue(m);
@ -1293,7 +1342,7 @@ vm_page_cache(m)
{ {
int s; int s;
if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->wire_count) {
printf("vm_page_cache: attempting to cache busy page\n"); printf("vm_page_cache: attempting to cache busy page\n");
return; return;
} }

View File

@ -225,6 +225,13 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
* These are the flags defined for vm_page. * These are the flags defined for vm_page.
* *
* Note: PG_FILLED and PG_DIRTY are added for the filesystems. * Note: PG_FILLED and PG_DIRTY are added for the filesystems.
*
* Note: PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is
* not under PV management but otherwise should be treated as a
* normal page. Pages not under PV management cannot be paged out
* via the object/vm_page_t because there is no knowledge of their
* pte mappings, nor can they be removed from their objects via
* the object, and such pages are also not on any PQ queue.
*/ */
#define PG_BUSY 0x0001 /* page is in transit (O) */ #define PG_BUSY 0x0001 /* page is in transit (O) */
#define PG_WANTED 0x0002 /* someone is waiting for page (O) */ #define PG_WANTED 0x0002 /* someone is waiting for page (O) */
@ -236,6 +243,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
#define PG_CLEANCHK 0x0100 /* page will be checked for cleaning */ #define PG_CLEANCHK 0x0100 /* page will be checked for cleaning */
#define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */ #define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */
#define PG_NOSYNC 0x0400 /* do not collect for syncer */ #define PG_NOSYNC 0x0400 /* do not collect for syncer */
#define PG_UNMANAGED 0x0800 /* No PV management for page */
/* /*
* Misc constants. * Misc constants.
@ -399,6 +407,7 @@ void vm_page_remove __P((vm_page_t));
void vm_page_rename __P((vm_page_t, vm_object_t, vm_pindex_t)); void vm_page_rename __P((vm_page_t, vm_object_t, vm_pindex_t));
vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t)); vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t));
vm_page_t vm_add_new_page __P((vm_offset_t pa)); vm_page_t vm_add_new_page __P((vm_offset_t pa));
void vm_page_unmanage __P((vm_page_t));
void vm_page_unwire __P((vm_page_t, int)); void vm_page_unwire __P((vm_page_t, int));
void vm_page_wire __P((vm_page_t)); void vm_page_wire __P((vm_page_t));
void vm_page_unqueue __P((vm_page_t)); void vm_page_unqueue __P((vm_page_t));

View File

@ -233,11 +233,12 @@ vm_pageout_clean(m)
*/ */
/* /*
* Don't mess with the page if it's busy. * Don't mess with the page if it's busy, held, or special
*/ */
if ((m->hold_count != 0) || if ((m->hold_count != 0) ||
((m->busy != 0) || (m->flags & PG_BUSY))) ((m->busy != 0) || (m->flags & (PG_BUSY|PG_UNMANAGED)))) {
return 0; return 0;
}
mc[vm_pageout_page_count] = m; mc[vm_pageout_page_count] = m;
pageout_count = 1; pageout_count = 1;
@ -279,7 +280,7 @@ more:
break; break;
} }
if (((p->queue - p->pc) == PQ_CACHE) || if (((p->queue - p->pc) == PQ_CACHE) ||
(p->flags & PG_BUSY) || p->busy) { (p->flags & (PG_BUSY|PG_UNMANAGED)) || p->busy) {
ib = 0; ib = 0;
break; break;
} }
@ -309,7 +310,7 @@ more:
if ((p = vm_page_lookup(object, pindex + is)) == NULL) if ((p = vm_page_lookup(object, pindex + is)) == NULL)
break; break;
if (((p->queue - p->pc) == PQ_CACHE) || if (((p->queue - p->pc) == PQ_CACHE) ||
(p->flags & PG_BUSY) || p->busy) { (p->flags & (PG_BUSY|PG_UNMANAGED)) || p->busy) {
break; break;
} }
vm_page_test_dirty(p); vm_page_test_dirty(p);
@ -474,7 +475,7 @@ vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
if (p->wire_count != 0 || if (p->wire_count != 0 ||
p->hold_count != 0 || p->hold_count != 0 ||
p->busy != 0 || p->busy != 0 ||
(p->flags & PG_BUSY) || (p->flags & (PG_BUSY|PG_UNMANAGED)) ||
!pmap_page_exists(vm_map_pmap(map), p)) { !pmap_page_exists(vm_map_pmap(map), p)) {
p = next; p = next;
continue; continue;
@ -1047,7 +1048,10 @@ rescan0:
m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE); m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE);
if (!m) if (!m)
break; break;
if ((m->flags & PG_BUSY) || m->busy || m->hold_count || m->wire_count) { if ((m->flags & (PG_BUSY|PG_UNMANAGED)) ||
m->busy ||
m->hold_count ||
m->wire_count) {
#ifdef INVARIANTS #ifdef INVARIANTS
printf("Warning: busy page %p found in cache\n", m); printf("Warning: busy page %p found in cache\n", m);
#endif #endif