freebsd-skq/sys/vm/device_pager.c
David Greenman 0d94caffca These changes embody the support of the fully coherent merged VM buffer cache,
much higher filesystem I/O performance, and much better paging performance. It
represents the culmination of over 6 months of R&D.

The majority of the merged VM/cache work is by John Dyson.

The following highlights the most significant changes. Additionally, there are
(mostly minor) changes to the various filesystem modules (nfs, msdosfs, etc) to
support the new VM/buffer scheme.

vfs_bio.c:
Significant rewrite of most of vfs_bio to support the merged VM buffer cache
scheme.  The scheme is almost fully compatible with the old filesystem
interface.  Significant improvement in the number of opportunities for write
clustering.

vfs_cluster.c, vfs_subr.c
Upgrade and performance enhancements in vfs layer code to support merged
VM/buffer cache.  Fixup of vfs_cluster to eliminate the bogus pagemove stuff.

vm_object.c:
Yet more improvements in the collapse code.  Elimination of some windows that
can cause list corruption.

vm_pageout.c:
Fixed it, it really works better now.  Somehow in 2.0, some "enhancements"
broke the code.  This code has been reworked from the ground-up.

vm_fault.c, vm_page.c, pmap.c, vm_object.c
Support for small-block filesystems with merged VM/buffer cache scheme.

pmap.c vm_map.c
Dynamic kernel VM size, now we dont have to pre-allocate excessive numbers of
kernel PTs.

vm_glue.c
Much simpler and more effective swapping code.  No more gratuitous swapping.

proc.h
Fixed the problem that the p_lock flag was not being cleared on a fork.

swap_pager.c, vnode_pager.c
Removal of old vfs_bio cruft to support the past pseudo-coherency.  Now the
code doesn't need it anymore.

machdep.c
Changes to better support the parameter values for the merged VM/buffer cache
scheme.

machdep.c, kern_exec.c, vm_glue.c
Implemented a seperate submap for temporary exec string space and another one
to contain process upages. This eliminates all map fragmentation problems
that previously existed.

ffs_inode.c, ufs_inode.c, ufs_readwrite.c
Changes for merged VM/buffer cache.  Add "bypass" support for sneaking in on
busy buffers.

Submitted by:	John Dyson and David Greenman
1995-01-09 16:06:02 +00:00

369 lines
9.5 KiB
C

/*
* Copyright (c) 1990 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)device_pager.c 8.1 (Berkeley) 6/11/93
* $Id: device_pager.c,v 1.4 1994/10/02 17:48:58 phk Exp $
*/
/*
* Page to/from special files.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/mman.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <vm/vm.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
#include <vm/device_pager.h>
struct pagerlst dev_pager_list; /* list of managed devices */
struct pglist dev_pager_fakelist; /* list of available vm_page_t's */
#ifdef DEBUG
int dpagerdebug = 0;
#define DDB_FOLLOW 0x01
#define DDB_INIT 0x02
#define DDB_ALLOC 0x04
#define DDB_FAIL 0x08
#endif
static vm_pager_t dev_pager_alloc __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
static void dev_pager_dealloc __P((vm_pager_t));
static int dev_pager_getpage __P((vm_pager_t, vm_page_t, boolean_t));
static boolean_t dev_pager_haspage __P((vm_pager_t, vm_offset_t));
static void dev_pager_init __P((void));
static int dev_pager_putpage __P((vm_pager_t, vm_page_t, boolean_t));
static vm_page_t dev_pager_getfake __P((vm_offset_t));
static void dev_pager_putfake __P((vm_page_t));
struct pagerops devicepagerops = {
dev_pager_init,
dev_pager_alloc,
dev_pager_dealloc,
dev_pager_getpage,
0,
dev_pager_putpage,
0,
dev_pager_haspage
};
static void
dev_pager_init()
{
#ifdef DEBUG
if (dpagerdebug & DDB_FOLLOW)
printf("dev_pager_init()\n");
#endif
TAILQ_INIT(&dev_pager_list);
TAILQ_INIT(&dev_pager_fakelist);
}
static vm_pager_t
dev_pager_alloc(handle, size, prot, foff)
caddr_t handle;
vm_size_t size;
vm_prot_t prot;
vm_offset_t foff;
{
dev_t dev;
vm_pager_t pager;
int (*mapfunc) ();
vm_object_t object;
dev_pager_t devp;
unsigned int npages, off;
#ifdef DEBUG
if (dpagerdebug & DDB_FOLLOW)
printf("dev_pager_alloc(%x, %x, %x, %x)\n",
handle, size, prot, foff);
#endif
#ifdef DIAGNOSTIC
/*
* Pageout to device, should never happen.
*/
if (handle == NULL)
panic("dev_pager_alloc called");
#endif
/*
* Make sure this device can be mapped.
*/
dev = (dev_t) (u_long) handle;
mapfunc = cdevsw[major(dev)].d_mmap;
if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
return (NULL);
/*
* Offset should be page aligned.
*/
if (foff & (PAGE_SIZE - 1))
return (NULL);
/*
* Check that the specified range of the device allows the desired
* protection.
*
* XXX assumes VM_PROT_* == PROT_*
*/
npages = atop(round_page(size));
for (off = foff; npages--; off += PAGE_SIZE)
if ((*mapfunc) (dev, off, (int) prot) == -1)
return (NULL);
/*
* Look up pager, creating as necessary.
*/
top:
pager = vm_pager_lookup(&dev_pager_list, handle);
if (pager == NULL) {
/*
* Allocate and initialize pager structs
*/
pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
if (pager == NULL)
return (NULL);
devp = (dev_pager_t) malloc(sizeof *devp, M_VMPGDATA, M_WAITOK);
if (devp == NULL) {
free((caddr_t) pager, M_VMPAGER);
return (NULL);
}
pager->pg_handle = handle;
pager->pg_ops = &devicepagerops;
pager->pg_type = PG_DEVICE;
pager->pg_data = (caddr_t) devp;
TAILQ_INIT(&devp->devp_pglist);
/*
* Allocate object and associate it with the pager.
*/
object = devp->devp_object = vm_object_allocate(0);
vm_object_enter(object, pager);
vm_object_setpager(object, pager, (vm_offset_t) foff, FALSE);
/*
* Finally, put it on the managed list so other can find it.
* First we re-lookup in case someone else beat us to this
* point (due to blocking in the various mallocs). If so, we
* free everything and start over.
*/
if (vm_pager_lookup(&dev_pager_list, handle)) {
free((caddr_t) devp, M_VMPGDATA);
free((caddr_t) pager, M_VMPAGER);
goto top;
}
TAILQ_INSERT_TAIL(&dev_pager_list, pager, pg_list);
#ifdef DEBUG
if (dpagerdebug & DDB_ALLOC) {
printf("dev_pager_alloc: pager %x devp %x object %x\n",
pager, devp, object);
vm_object_print(object, FALSE);
}
#endif
} else {
/*
* vm_object_lookup() gains a reference and also removes the
* object from the cache.
*/
object = vm_object_lookup(pager);
#ifdef DIAGNOSTIC
devp = (dev_pager_t) pager->pg_data;
if (object != devp->devp_object)
panic("dev_pager_setup: bad object");
#endif
}
return (pager);
}
static void
dev_pager_dealloc(pager)
vm_pager_t pager;
{
dev_pager_t devp;
vm_object_t object;
vm_page_t m;
#ifdef DEBUG
if (dpagerdebug & DDB_FOLLOW)
printf("dev_pager_dealloc(%x)\n", pager);
#endif
TAILQ_REMOVE(&dev_pager_list, pager, pg_list);
/*
* Get the object. Note: cannot use vm_object_lookup since object has
* already been removed from the hash chain.
*/
devp = (dev_pager_t) pager->pg_data;
object = devp->devp_object;
#ifdef DEBUG
if (dpagerdebug & DDB_ALLOC)
printf("dev_pager_dealloc: devp %x object %x\n", devp, object);
#endif
/*
* Free up our fake pages.
*/
while ((m = devp->devp_pglist.tqh_first) != 0) {
TAILQ_REMOVE(&devp->devp_pglist, m, pageq);
dev_pager_putfake(m);
}
free((caddr_t) devp, M_VMPGDATA);
free((caddr_t) pager, M_VMPAGER);
}
static int
dev_pager_getpage(pager, m, sync)
vm_pager_t pager;
vm_page_t m;
boolean_t sync;
{
register vm_object_t object;
vm_offset_t offset, paddr;
vm_page_t page;
dev_t dev;
int s;
int (*mapfunc) (), prot;
#ifdef DEBUG
if (dpagerdebug & DDB_FOLLOW)
printf("dev_pager_getpage(%x, %x)\n", pager, m);
#endif
object = m->object;
dev = (dev_t) (u_long) pager->pg_handle;
offset = m->offset + object->paging_offset;
prot = PROT_READ; /* XXX should pass in? */
mapfunc = cdevsw[major(dev)].d_mmap;
if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
panic("dev_pager_getpage: no map function");
paddr = pmap_phys_address((*mapfunc) ((dev_t) dev, (int) offset, prot));
#ifdef DIAGNOSTIC
if (paddr == -1)
panic("dev_pager_getpage: map function returns error");
#endif
/*
* Replace the passed in page with our own fake page and free up the
* original.
*/
page = dev_pager_getfake(paddr);
TAILQ_INSERT_TAIL(&((dev_pager_t) pager->pg_data)->devp_pglist,
page, pageq);
vm_object_lock(object);
vm_page_lock_queues();
PAGE_WAKEUP(m);
vm_page_free(m);
vm_page_unlock_queues();
s = splhigh();
vm_page_insert(page, object, offset);
splx(s);
if (offset + PAGE_SIZE > object->size)
object->size = offset + PAGE_SIZE; /* XXX anal */
vm_object_unlock(object);
return (VM_PAGER_OK);
}
static int
dev_pager_putpage(pager, m, sync)
vm_pager_t pager;
vm_page_t m;
boolean_t sync;
{
#ifdef DEBUG
if (dpagerdebug & DDB_FOLLOW)
printf("dev_pager_putpage(%x, %x)\n", pager, m);
#endif
if (pager == NULL)
return 0;
panic("dev_pager_putpage called");
}
static boolean_t
dev_pager_haspage(pager, offset)
vm_pager_t pager;
vm_offset_t offset;
{
#ifdef DEBUG
if (dpagerdebug & DDB_FOLLOW)
printf("dev_pager_haspage(%x, %x)\n", pager, offset);
#endif
return (TRUE);
}
static vm_page_t
dev_pager_getfake(paddr)
vm_offset_t paddr;
{
vm_page_t m;
int i;
if (dev_pager_fakelist.tqh_first == NULL) {
m = (vm_page_t) malloc(PAGE_SIZE * 2, M_VMPGDATA, M_WAITOK);
for (i = (PAGE_SIZE * 2) / sizeof(*m); i > 0; i--) {
TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
m++;
}
}
m = dev_pager_fakelist.tqh_first;
TAILQ_REMOVE(&dev_pager_fakelist, m, pageq);
m->flags = PG_BUSY | PG_FICTITIOUS;
m->dirty = 0;
m->valid = VM_PAGE_BITS_ALL;
m->busy = 0;
m->bmapped = 0;
m->wire_count = 1;
m->phys_addr = paddr;
return (m);
}
static void
dev_pager_putfake(m)
vm_page_t m;
{
#ifdef DIAGNOSTIC
if (!(m->flags & PG_FICTITIOUS))
panic("dev_pager_putfake: bad page");
#endif
TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
}