This is a cleanup patch to Peter's new OBJT_PHYS VM object type

and sysv shared memory support for it.  It implements a new
    PG_UNMANAGED flag that has slightly different characteristics
    from PG_FICTICIOUS.

    A new sysctl, kern.ipc.shm_use_phys has been added to enable the
    use of physically-backed sysv shared memory rather then swap-backed.
    Physically backed shm segments are not tracked with PV entries,
    allowing programs which use a large shm segment as a rendezvous
    point to operate without eating an insane amount of KVM in the
    PV entry management.  Read: Oracle.

    Peter's OBJT_PHYS object will also allow us to eventually implement
    page-table sharing and/or 4MB physical page support for such segments.
    We're half way there.
This commit is contained in:
Matthew Dillon 2000-05-29 22:40:54 +00:00
parent b620c1f5d6
commit 8b03c8ed5e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=61081
10 changed files with 118 additions and 47 deletions

View File

@ -2095,7 +2095,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
if (pmap_initialized &&
(m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
pmap_insert_entry(pmap, va, mpte, m);
pa |= PG_MANAGED;
}
@ -2223,7 +2224,8 @@ pmap_enter_quick(pmap, va, m, mpte)
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
pmap_insert_entry(pmap, va, mpte, m);
if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
pmap_insert_entry(pmap, va, mpte, m);
/*
* Increment counters
@ -2235,7 +2237,10 @@ pmap_enter_quick(pmap, va, m, mpte)
/*
* Now validate mapping with RO protection
*/
*pte = pa | PG_V | PG_U | PG_MANAGED;
if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
*pte = pa | PG_V | PG_U;
else
*pte = pa | PG_V | PG_U | PG_MANAGED;
return mpte;
}

View File

@ -2095,7 +2095,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
if (pmap_initialized &&
(m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
pmap_insert_entry(pmap, va, mpte, m);
pa |= PG_MANAGED;
}
@ -2223,7 +2224,8 @@ pmap_enter_quick(pmap, va, m, mpte)
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
pmap_insert_entry(pmap, va, mpte, m);
if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
pmap_insert_entry(pmap, va, mpte, m);
/*
* Increment counters
@ -2235,7 +2237,10 @@ pmap_enter_quick(pmap, va, m, mpte)
/*
* Now validate mapping with RO protection
*/
*pte = pa | PG_V | PG_U | PG_MANAGED;
if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
*pte = pa | PG_V | PG_U;
else
*pte = pa | PG_V | PG_U | PG_MANAGED;
return mpte;
}

View File

@ -126,12 +126,15 @@ struct shminfo shminfo = {
SHMALL
};
static int shm_use_phys;
SYSCTL_DECL(_kern_ipc);
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RW, &shminfo.shmseg, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW, &shm_use_phys, 0, "");
static int
shm_find_segment_by_key(key)
@ -528,13 +531,13 @@ shmget_allocate_segment(p, uap, mode)
* We make sure that we have allocated a pager before we need
* to.
*/
#ifdef SHM_PHYS_BACKED
shm_handle->shm_object =
vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
#else
shm_handle->shm_object =
vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
#endif
if (shm_use_phys) {
shm_handle->shm_object =
vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
} else {
shm_handle->shm_object =
vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
}
vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);

View File

@ -104,7 +104,9 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
object = vm_object_allocate(OBJT_PHYS,
OFF_TO_IDX(foff + size));
object->handle = handle;
#if 0
TAILQ_INIT(&object->un_pager.physp.physp_pglist);
#endif
TAILQ_INSERT_TAIL(&phys_pager_object_list, object,
pager_object_list);
} else {
@ -131,20 +133,6 @@ phys_pager_dealloc(object)
int s;
TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list);
/*
* Free up our fake pages.
*/
s = splvm();
while ((m = TAILQ_FIRST(&object->un_pager.physp.physp_pglist)) != 0) {
TAILQ_REMOVE(&object->un_pager.physp.physp_pglist, m, pageq);
/* return the page back to normal */
m->flags &= ~PG_FICTITIOUS;
m->dirty = 0;
vm_page_unwire(m, 0);
vm_page_flag_clear(m, PG_ZERO);
vm_page_free(m);
}
splx(s);
}
static int
@ -165,8 +153,7 @@ phys_pager_getpages(object, m, count, reqpage)
vm_page_zero_fill(m[i]);
vm_page_flag_set(m[i], PG_ZERO);
/* Switch off pv_entries */
vm_page_wire(m[i]);
vm_page_flag_set(m[i], PG_FICTITIOUS);
vm_page_unmanage(m[i]);
m[i]->valid = VM_PAGE_BITS_ALL;
m[i]->dirty = 0;
/* The requested page must remain busy, the others not. */
@ -174,8 +161,6 @@ phys_pager_getpages(object, m, count, reqpage)
vm_page_flag_clear(m[i], PG_BUSY);
m[i]->busy = 0;
}
TAILQ_INSERT_TAIL(&object->un_pager.physp.physp_pglist, m[i],
pageq);
}
splx(s);

View File

@ -423,7 +423,7 @@ RetryFault:;
if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL))
break;
if (mt->busy ||
(mt->flags & (PG_BUSY | PG_FICTITIOUS)) ||
(mt->flags & (PG_BUSY | PG_FICTITIOUS | PG_UNMANAGED)) ||
mt->hold_count ||
mt->wire_count)
continue;

View File

@ -833,12 +833,14 @@ vm_object_madvise(object, pindex, count, advise)
/*
* If the page is busy or not in a normal active state,
* we skip it. Things can break if we mess with pages
* in any of the below states.
* we skip it. If the page is not managed there are no
* page queues to mess with. Things can break if we mess
* with pages in any of the below states.
*/
if (
m->hold_count ||
m->wire_count ||
(m->flags & PG_UNMANAGED) ||
m->valid != VM_PAGE_BITS_ALL
) {
continue;
@ -1394,6 +1396,13 @@ vm_object_page_remove(object, start, end, clean_only)
all = ((end == 0) && (start == 0));
/*
* Since physically-backed objects do not use managed pages, we can't
* remove pages from the object (we must instead remove the page
* references, and then destroy the object).
*/
KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object"));
vm_object_pip_add(object, 1);
again:
size = end - start;

View File

@ -123,6 +123,7 @@ struct vm_object {
TAILQ_HEAD(, vm_page) devp_pglist;
} devp;
#if 0
/*
* Physmem pager
*
@ -131,6 +132,7 @@ struct vm_object {
struct {
TAILQ_HEAD(, vm_page) physp_pglist;
} physp;
#endif
/*
* Swap pager

View File

@ -688,7 +688,7 @@ vm_page_select_cache(object, pindex)
(pindex + object->pg_color) & PQ_L2_MASK,
FALSE
);
if (m && ((m->flags & PG_BUSY) || m->busy ||
if (m && ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy ||
m->hold_count || m->wire_count)) {
vm_page_deactivate(m);
continue;
@ -997,7 +997,7 @@ vm_page_activate(m)
vm_page_unqueue(m);
if (m->wire_count == 0) {
if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
m->queue = PQ_ACTIVE;
vm_page_queues[PQ_ACTIVE].lcnt++;
TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
@ -1128,9 +1128,17 @@ vm_page_free_toq(vm_page_t m)
}
}
/*
* Clear the UNMANAGED flag when freeing an unmanaged page.
*/
if (m->flags & PG_UNMANAGED) {
m->flags &= ~PG_UNMANAGED;
} else {
#ifdef __alpha__
pmap_page_is_free(m);
pmap_page_is_free(m);
#endif
}
m->queue = PQ_FREE + m->pc;
pq = &vm_page_queues[m->queue];
@ -1154,6 +1162,39 @@ vm_page_free_toq(vm_page_t m)
splx(s);
}
/*
* vm_page_unmanage:
*
* Prevent PV management from being done on the page. The page is
* removed from the paging queues as if it were wired, and as a
* consequence of no longer being managed the pageout daemon will not
* touch it (since there is no way to locate the pte mappings for the
* page). madvise() calls that mess with the pmap will also no longer
* operate on the page.
*
* Beyond that the page is still reasonably 'normal'. Freeing the page
* will clear the flag.
*
* This routine is used by OBJT_PHYS objects - objects using unswappable
* physical memory as backing store rather then swap-backed memory and
* will eventually be extended to support 4MB unmanaged physical
* mappings.
*/
void
vm_page_unmanage(vm_page_t m)
{
int s;
s = splvm();
if ((m->flags & PG_UNMANAGED) == 0) {
if (m->wire_count == 0)
vm_page_unqueue(m);
}
vm_page_flag_set(m, PG_UNMANAGED);
splx(s);
}
/*
* vm_page_wire:
*
@ -1170,9 +1211,15 @@ vm_page_wire(m)
{
int s;
/*
* Only bump the wire statistics if the page is not already wired,
* and only unqueue the page if it is on some queue (if it is unmanaged
* it is already off the queues).
*/
s = splvm();
if (m->wire_count == 0) {
vm_page_unqueue(m);
if ((m->flags & PG_UNMANAGED) == 0)
vm_page_unqueue(m);
cnt.v_wire_count++;
}
m->wire_count++;
@ -1218,7 +1265,9 @@ vm_page_unwire(m, activate)
m->wire_count--;
if (m->wire_count == 0) {
cnt.v_wire_count--;
if (activate) {
if (m->flags & PG_UNMANAGED) {
;
} else if (activate) {
TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
m->queue = PQ_ACTIVE;
vm_page_queues[PQ_ACTIVE].lcnt++;
@ -1259,7 +1308,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
return;
s = splvm();
if (m->wire_count == 0) {
if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
if ((m->queue - m->pc) == PQ_CACHE)
cnt.v_reactivated++;
vm_page_unqueue(m);
@ -1293,7 +1342,7 @@ vm_page_cache(m)
{
int s;
if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->wire_count) {
printf("vm_page_cache: attempting to cache busy page\n");
return;
}

View File

@ -225,6 +225,13 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
* These are the flags defined for vm_page.
*
* Note: PG_FILLED and PG_DIRTY are added for the filesystems.
*
* Note: PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is
* not under PV management but otherwise should be treated as a
* normal page. Pages not under PV management cannot be paged out
* via the object/vm_page_t because there is no knowledge of their
* pte mappings, nor can they be removed from their objects via
* the object, and such pages are also not on any PQ queue.
*/
#define PG_BUSY 0x0001 /* page is in transit (O) */
#define PG_WANTED 0x0002 /* someone is waiting for page (O) */
@ -236,6 +243,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
#define PG_CLEANCHK 0x0100 /* page will be checked for cleaning */
#define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */
#define PG_NOSYNC 0x0400 /* do not collect for syncer */
#define PG_UNMANAGED 0x0800 /* No PV management for page */
/*
* Misc constants.
@ -399,6 +407,7 @@ void vm_page_remove __P((vm_page_t));
void vm_page_rename __P((vm_page_t, vm_object_t, vm_pindex_t));
vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t));
vm_page_t vm_add_new_page __P((vm_offset_t pa));
void vm_page_unmanage __P((vm_page_t));
void vm_page_unwire __P((vm_page_t, int));
void vm_page_wire __P((vm_page_t));
void vm_page_unqueue __P((vm_page_t));

View File

@ -233,11 +233,12 @@ vm_pageout_clean(m)
*/
/*
* Don't mess with the page if it's busy.
* Don't mess with the page if it's busy, held, or special
*/
if ((m->hold_count != 0) ||
((m->busy != 0) || (m->flags & PG_BUSY)))
((m->busy != 0) || (m->flags & (PG_BUSY|PG_UNMANAGED)))) {
return 0;
}
mc[vm_pageout_page_count] = m;
pageout_count = 1;
@ -279,7 +280,7 @@ vm_pageout_clean(m)
break;
}
if (((p->queue - p->pc) == PQ_CACHE) ||
(p->flags & PG_BUSY) || p->busy) {
(p->flags & (PG_BUSY|PG_UNMANAGED)) || p->busy) {
ib = 0;
break;
}
@ -309,7 +310,7 @@ vm_pageout_clean(m)
if ((p = vm_page_lookup(object, pindex + is)) == NULL)
break;
if (((p->queue - p->pc) == PQ_CACHE) ||
(p->flags & PG_BUSY) || p->busy) {
(p->flags & (PG_BUSY|PG_UNMANAGED)) || p->busy) {
break;
}
vm_page_test_dirty(p);
@ -474,7 +475,7 @@ vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
if (p->wire_count != 0 ||
p->hold_count != 0 ||
p->busy != 0 ||
(p->flags & PG_BUSY) ||
(p->flags & (PG_BUSY|PG_UNMANAGED)) ||
!pmap_page_exists(vm_map_pmap(map), p)) {
p = next;
continue;
@ -1047,7 +1048,10 @@ vm_pageout_scan()
m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE);
if (!m)
break;
if ((m->flags & PG_BUSY) || m->busy || m->hold_count || m->wire_count) {
if ((m->flags & (PG_BUSY|PG_UNMANAGED)) ||
m->busy ||
m->hold_count ||
m->wire_count) {
#ifdef INVARIANTS
printf("Warning: busy page %p found in cache\n", m);
#endif