freebsd-dev/sys/vm/swap_pager.c
David Greenman 976e77fc9c 1) Some of the counters in the vmmeter struct don't fit well into the Mach VM
scheme of things, so I've changed them to be more appropriate. page in/ous
   are now associated with the pager that did them. Nuked v_fault as the
   only fault of interest that wouldn't be already counted in v_trap is a VM
   fault, and this is counted seperately.
2) Implemented most of the remaining counters and corrected the counting of
   some that were done wrong. They are all almost correct now...just a few
   minor ones left to fix.
1994-10-15 13:33:09 +00:00

1701 lines
38 KiB
C

/*
* Copyright (c) 1994 John S. Dyson
* Copyright (c) 1990 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
*
* @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
* $Id: swap_pager.c,v 1.13 1994/10/14 12:26:17 davidg Exp $
*/
/*
* Quick hack to page to dedicated partition(s).
* TODO:
* Add multiprocessor locks
* Deal with async writes in a better fashion
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <miscfs/specfs/specdev.h>
#include <sys/rlist.h>
#include <vm/vm.h>
#include <vm/vm_pager.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/swap_pager.h>
#ifndef NPENDINGIO
#define NPENDINGIO 16
#endif
int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int));
int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *));
int nswiodone;
extern int vm_pageout_rate_limit;
static int cleandone;
extern int hz;
int swap_pager_full;
extern vm_map_t pager_map;
extern int vm_swap_size;
struct rlist *swaplist;
int nswaplist;
#define MAX_PAGEOUT_CLUSTER 8
TAILQ_HEAD(swpclean, swpagerclean);
typedef struct swpagerclean *swp_clean_t;
struct swpagerclean {
TAILQ_ENTRY(swpagerclean) spc_list;
int spc_flags;
struct buf *spc_bp;
sw_pager_t spc_swp;
vm_offset_t spc_kva;
int spc_count;
vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
} swcleanlist [NPENDINGIO] ;
extern vm_map_t kernel_map;
/* spc_flags values */
#define SPC_ERROR 0x01
#define SWB_EMPTY (-1)
struct swpclean swap_pager_done; /* list of compileted page cleans */
struct swpclean swap_pager_inuse; /* list of pending page cleans */
struct swpclean swap_pager_free; /* list of free pager clean structs */
struct pagerlst swap_pager_list; /* list of "named" anon regions */
struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */
#define SWAP_FREE_NEEDED 0x1 /* need a swap block */
int swap_pager_needflags;
struct rlist *swapfrag;
struct pagerlst *swp_qs[]={
&swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0
};
int swap_pager_putmulti();
struct pagerops swappagerops = {
swap_pager_init,
swap_pager_alloc,
swap_pager_dealloc,
swap_pager_getpage,
swap_pager_getmulti,
swap_pager_putpage,
swap_pager_putmulti,
swap_pager_haspage
};
int npendingio = NPENDINGIO;
int pendingiowait;
int require_swap_init;
void swap_pager_finish();
int dmmin, dmmax;
extern int vm_page_count;
static inline void swapsizecheck() {
if( vm_swap_size < 128*btodb(PAGE_SIZE)) {
if( swap_pager_full)
printf("swap_pager: out of space\n");
swap_pager_full = 1;
} else if( vm_swap_size > 192*btodb(PAGE_SIZE))
swap_pager_full = 0;
}
void
swap_pager_init()
{
dfltpagerops = &swappagerops;
TAILQ_INIT(&swap_pager_list);
TAILQ_INIT(&swap_pager_un_list);
/*
* Initialize clean lists
*/
TAILQ_INIT(&swap_pager_inuse);
TAILQ_INIT(&swap_pager_done);
TAILQ_INIT(&swap_pager_free);
require_swap_init = 1;
/*
* Calculate the swap allocation constants.
*/
dmmin = CLBYTES/DEV_BSIZE;
dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2;
}
/*
* Allocate a pager structure and associated resources.
* Note that if we are called from the pageout daemon (handle == NULL)
* we should not wait for memory as it could resulting in deadlock.
*/
vm_pager_t
swap_pager_alloc(handle, size, prot, offset)
caddr_t handle;
register vm_size_t size;
vm_prot_t prot;
vm_offset_t offset;
{
register vm_pager_t pager;
register sw_pager_t swp;
int waitok;
int i,j;
if (require_swap_init) {
swp_clean_t spc;
struct buf *bp;
/*
* kva's are allocated here so that we dont need to keep
* doing kmem_alloc pageables at runtime
*/
for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) {
spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER);
if (!spc->spc_kva) {
break;
}
spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT);
if (!spc->spc_bp) {
kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
break;
}
spc->spc_flags = 0;
TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
}
require_swap_init = 0;
if( size == 0)
return(NULL);
}
/*
* If this is a "named" anonymous region, look it up and
* return the appropriate pager if it exists.
*/
if (handle) {
pager = vm_pager_lookup(&swap_pager_list, handle);
if (pager != NULL) {
/*
* Use vm_object_lookup to gain a reference
* to the object and also to remove from the
* object cache.
*/
if (vm_object_lookup(pager) == NULL)
panic("swap_pager_alloc: bad object");
return(pager);
}
}
if (swap_pager_full) {
return(NULL);
}
/*
* Pager doesn't exist, allocate swap management resources
* and initialize.
*/
waitok = handle ? M_WAITOK : M_NOWAIT;
pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
if (pager == NULL)
return(NULL);
swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
if (swp == NULL) {
free((caddr_t)pager, M_VMPAGER);
return(NULL);
}
size = round_page(size);
swp->sw_osize = size;
swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE);
swp->sw_blocks = (sw_blk_t)
malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
M_VMPGDATA, waitok);
if (swp->sw_blocks == NULL) {
free((caddr_t)swp, M_VMPGDATA);
free((caddr_t)pager, M_VMPAGER);
return(NULL);
}
for (i = 0; i < swp->sw_nblocks; i++) {
swp->sw_blocks[i].swb_valid = 0;
swp->sw_blocks[i].swb_locked = 0;
for (j = 0; j < SWB_NPAGES; j++)
swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
}
swp->sw_poip = 0;
if (handle) {
vm_object_t object;
swp->sw_flags = SW_NAMED;
TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
/*
* Consistant with other pagers: return with object
* referenced. Can't do this with handle == NULL
* since it might be the pageout daemon calling.
*/
object = vm_object_allocate(size);
vm_object_enter(object, pager);
vm_object_setpager(object, pager, 0, FALSE);
} else {
swp->sw_flags = 0;
TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list);
}
pager->pg_handle = handle;
pager->pg_ops = &swappagerops;
pager->pg_type = PG_SWAP;
pager->pg_data = (caddr_t)swp;
return(pager);
}
/*
* returns disk block associated with pager and offset
* additionally, as a side effect returns a flag indicating
* if the block has been written
*/
static int *
swap_pager_diskaddr(swp, offset, valid)
sw_pager_t swp;
vm_offset_t offset;
int *valid;
{
register sw_blk_t swb;
int ix;
if (valid)
*valid = 0;
ix = offset / (SWB_NPAGES*PAGE_SIZE);
if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
return(FALSE);
}
swb = &swp->sw_blocks[ix];
ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
if (valid)
*valid = swb->swb_valid & (1<<ix);
return &swb->swb_block[ix];
}
/*
* Utility routine to set the valid (written) bit for
* a block associated with a pager and offset
*/
static void
swap_pager_setvalid(swp, offset, valid)
sw_pager_t swp;
vm_offset_t offset;
int valid;
{
register sw_blk_t swb;
int ix;
ix = offset / (SWB_NPAGES*PAGE_SIZE);
if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
return;
swb = &swp->sw_blocks[ix];
ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
if (valid)
swb->swb_valid |= (1 << ix);
else
swb->swb_valid &= ~(1 << ix);
return;
}
/*
* this routine allocates swap space with a fragmentation
* minimization policy.
*/
int
swap_pager_getswapspace( unsigned amount, unsigned *rtval) {
#ifdef EXP
unsigned tmpalloc;
unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
if( amount < nblocksfrag) {
if( rlist_alloc(&swapfrag, amount, rtval))
return 1;
if( !rlist_alloc(&swaplist, nblocksfrag, &tmpalloc))
return 0;
rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1);
*rtval = tmpalloc;
return 1;
}
#endif
if( !rlist_alloc(&swaplist, amount, rtval))
return 0;
else
return 1;
}
/*
* this routine frees swap space with a fragmentation
* minimization policy.
*/
void
swap_pager_freeswapspace( unsigned from, unsigned to) {
#ifdef EXP
unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
unsigned tmpalloc;
if( ((to + 1) - from) >= nblocksfrag) {
#endif
rlist_free(&swaplist, from, to);
#ifdef EXP
return;
}
rlist_free(&swapfrag, from, to);
while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) {
rlist_free(&swaplist, tmpalloc, tmpalloc + nblocksfrag-1);
}
#endif
}
/*
* this routine frees swap blocks from a specified pager
*/
void
_swap_pager_freespace(swp, start, size)
sw_pager_t swp;
vm_offset_t start;
vm_offset_t size;
{
vm_offset_t i;
int s;
s = splbio();
for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) {
int valid;
int *addr = swap_pager_diskaddr(swp, i, &valid);
if (addr && *addr != SWB_EMPTY) {
swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
if( valid) {
vm_swap_size += btodb(PAGE_SIZE);
swap_pager_setvalid(swp, i, 0);
}
*addr = SWB_EMPTY;
}
}
swapsizecheck();
splx(s);
}
void
swap_pager_freespace(pager, start, size)
vm_pager_t pager;
vm_offset_t start;
vm_offset_t size;
{
_swap_pager_freespace((sw_pager_t) pager->pg_data, start, size);
}
/*
* swap_pager_reclaim frees up over-allocated space from all pagers
* this eliminates internal fragmentation due to allocation of space
* for segments that are never swapped to. It has been written so that
* it does not block until the rlist_free operation occurs; it keeps
* the queues consistant.
*/
/*
* Maximum number of blocks (pages) to reclaim per pass
*/
#define MAXRECLAIM 256
void
swap_pager_reclaim()
{
vm_pager_t p;
sw_pager_t swp;
int i, j, k;
int s;
int reclaimcount;
static int reclaims[MAXRECLAIM];
static int in_reclaim;
/*
* allow only one process to be in the swap_pager_reclaim subroutine
*/
s = splbio();
if (in_reclaim) {
tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0);
splx(s);
return;
}
in_reclaim = 1;
reclaimcount = 0;
/* for each pager queue */
for (k = 0; swp_qs[k]; k++) {
p = swp_qs[k]->tqh_first;
while (p && (reclaimcount < MAXRECLAIM)) {
/*
* see if any blocks associated with a pager has been
* allocated but not used (written)
*/
swp = (sw_pager_t) p->pg_data;
for (i = 0; i < swp->sw_nblocks; i++) {
sw_blk_t swb = &swp->sw_blocks[i];
if( swb->swb_locked)
continue;
for (j = 0; j < SWB_NPAGES; j++) {
if (swb->swb_block[j] != SWB_EMPTY &&
(swb->swb_valid & (1 << j)) == 0) {
reclaims[reclaimcount++] = swb->swb_block[j];
swb->swb_block[j] = SWB_EMPTY;
if (reclaimcount >= MAXRECLAIM)
goto rfinished;
}
}
}
p = p->pg_list.tqe_next;
}
}
rfinished:
/*
* free the blocks that have been added to the reclaim list
*/
for (i = 0; i < reclaimcount; i++) {
swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1);
swapsizecheck();
wakeup((caddr_t) &in_reclaim);
}
splx(s);
in_reclaim = 0;
wakeup((caddr_t) &in_reclaim);
}
/*
* swap_pager_copy copies blocks from one pager to another and
* destroys the source pager
*/
void
swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
vm_pager_t srcpager;
vm_offset_t srcoffset;
vm_pager_t dstpager;
vm_offset_t dstoffset;
vm_offset_t offset;
{
sw_pager_t srcswp, dstswp;
vm_offset_t i;
int s;
srcswp = (sw_pager_t) srcpager->pg_data;
dstswp = (sw_pager_t) dstpager->pg_data;
/*
* remove the source pager from the swap_pager internal queue
*/
s = splbio();
if (srcswp->sw_flags & SW_NAMED) {
TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list);
srcswp->sw_flags &= ~SW_NAMED;
} else {
TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list);
}
while (srcswp->sw_poip) {
tsleep((caddr_t)srcswp, PVM, "spgout", 0);
}
splx(s);
/*
* clean all of the pages that are currently active and finished
*/
(void) swap_pager_clean();
s = splbio();
/*
* clear source block before destination object
* (release allocated space)
*/
for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) {
int valid;
int *addr = swap_pager_diskaddr(srcswp, i, &valid);
if (addr && *addr != SWB_EMPTY) {
swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
if( valid)
vm_swap_size += btodb(PAGE_SIZE);
swapsizecheck();
*addr = SWB_EMPTY;
}
}
/*
* transfer source to destination
*/
for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) {
int srcvalid, dstvalid;
int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
&srcvalid);
int *dstaddrp;
/*
* see if the source has space allocated
*/
if (srcaddrp && *srcaddrp != SWB_EMPTY) {
/*
* if the source is valid and the dest has no space, then
* copy the allocation from the srouce to the dest.
*/
if (srcvalid) {
dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid);
/*
* if the dest already has a valid block, deallocate the
* source block without copying.
*/
if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1);
*dstaddrp = SWB_EMPTY;
}
if (dstaddrp && *dstaddrp == SWB_EMPTY) {
*dstaddrp = *srcaddrp;
*srcaddrp = SWB_EMPTY;
swap_pager_setvalid(dstswp, i + dstoffset, 1);
vm_swap_size -= btodb(PAGE_SIZE);
}
}
/*
* if the source is not empty at this point, then deallocate the space.
*/
if (*srcaddrp != SWB_EMPTY) {
swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
if( srcvalid)
vm_swap_size += btodb(PAGE_SIZE);
*srcaddrp = SWB_EMPTY;
}
}
}
/*
* deallocate the rest of the source object
*/
for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) {
int valid;
int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid);
if (srcaddrp && *srcaddrp != SWB_EMPTY) {
swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
if( valid)
vm_swap_size += btodb(PAGE_SIZE);
*srcaddrp = SWB_EMPTY;
}
}
swapsizecheck();
splx(s);
free((caddr_t)srcswp->sw_blocks, M_VMPGDATA);
srcswp->sw_blocks = 0;
free((caddr_t)srcswp, M_VMPGDATA);
srcpager->pg_data = 0;
free((caddr_t)srcpager, M_VMPAGER);
return;
}
void
swap_pager_dealloc(pager)
vm_pager_t pager;
{
register int i,j;
register sw_blk_t bp;
register sw_pager_t swp;
int s;
/*
* Remove from list right away so lookups will fail if we
* block for pageout completion.
*/
s = splbio();
swp = (sw_pager_t) pager->pg_data;
if (swp->sw_flags & SW_NAMED) {
TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
swp->sw_flags &= ~SW_NAMED;
} else {
TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list);
}
/*
* Wait for all pageouts to finish and remove
* all entries from cleaning list.
*/
while (swp->sw_poip) {
tsleep((caddr_t)swp, PVM, "swpout", 0);
}
splx(s);
(void) swap_pager_clean();
/*
* Free left over swap blocks
*/
s = splbio();
for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
for (j = 0; j < SWB_NPAGES; j++)
if (bp->swb_block[j] != SWB_EMPTY) {
swap_pager_freeswapspace((unsigned)bp->swb_block[j],
(unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1);
if( bp->swb_valid & (1<<j))
vm_swap_size += btodb(PAGE_SIZE);
bp->swb_block[j] = SWB_EMPTY;
}
}
splx(s);
swapsizecheck();
/*
* Free swap management resources
*/
free((caddr_t)swp->sw_blocks, M_VMPGDATA);
swp->sw_blocks = 0;
free((caddr_t)swp, M_VMPGDATA);
pager->pg_data = 0;
free((caddr_t)pager, M_VMPAGER);
}
/*
* swap_pager_getmulti can get multiple pages.
*/
int
swap_pager_getmulti(pager, m, count, reqpage, sync)
vm_pager_t pager;
vm_page_t *m;
int count;
int reqpage;
boolean_t sync;
{
if( reqpage >= count)
panic("swap_pager_getmulti: reqpage >= count\n");
return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage);
}
/*
* swap_pager_getpage gets individual pages
*/
int
swap_pager_getpage(pager, m, sync)
vm_pager_t pager;
vm_page_t m;
boolean_t sync;
{
vm_page_t marray[1];
marray[0] = m;
return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0);
}
int
swap_pager_putmulti(pager, m, c, sync, rtvals)
vm_pager_t pager;
vm_page_t *m;
int c;
boolean_t sync;
int *rtvals;
{
int flags;
if (pager == NULL) {
(void) swap_pager_clean();
return VM_PAGER_OK;
}
flags = B_WRITE;
if (!sync)
flags |= B_ASYNC;
return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals);
}
/*
* swap_pager_putpage writes individual pages
*/
int
swap_pager_putpage(pager, m, sync)
vm_pager_t pager;
vm_page_t m;
boolean_t sync;
{
int flags;
vm_page_t marray[1];
int rtvals[1];
if (pager == NULL) {
(void) swap_pager_clean();
return VM_PAGER_OK;
}
marray[0] = m;
flags = B_WRITE;
if (!sync)
flags |= B_ASYNC;
swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals);
return rtvals[0];
}
static inline int
const swap_pager_block_index(swp, offset)
sw_pager_t swp;
vm_offset_t offset;
{
return (offset / (SWB_NPAGES*PAGE_SIZE));
}
static inline int
const swap_pager_block_offset(swp, offset)
sw_pager_t swp;
vm_offset_t offset;
{
return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE);
}
/*
* _swap_pager_haspage returns TRUE if the pager has data that has
* been written out.
*/
static boolean_t
_swap_pager_haspage(swp, offset)
sw_pager_t swp;
vm_offset_t offset;
{
register sw_blk_t swb;
int ix;
ix = offset / (SWB_NPAGES*PAGE_SIZE);
if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
return(FALSE);
}
swb = &swp->sw_blocks[ix];
ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
if (swb->swb_block[ix] != SWB_EMPTY) {
if (swb->swb_valid & (1 << ix))
return TRUE;
}
return(FALSE);
}
/*
* swap_pager_haspage is the externally accessible version of
* _swap_pager_haspage above. this routine takes a vm_pager_t
* for an argument instead of sw_pager_t.
*/
boolean_t
swap_pager_haspage(pager, offset)
vm_pager_t pager;
vm_offset_t offset;
{
return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
}
/*
* swap_pager_freepage is a convienience routine that clears the busy
* bit and deallocates a page.
*/
static void
swap_pager_freepage(m)
vm_page_t m;
{
PAGE_WAKEUP(m);
vm_page_free(m);
}
/*
* swap_pager_ridpages is a convienience routine that deallocates all
* but the required page. this is usually used in error returns that
* need to invalidate the "extra" readahead pages.
*/
static void
swap_pager_ridpages(m, count, reqpage)
vm_page_t *m;
int count;
int reqpage;
{
int i;
for (i = 0; i < count; i++)
if (i != reqpage)
swap_pager_freepage(m[i]);
}
int swapwritecount=0;
/*
* swap_pager_iodone1 is the completion routine for both reads and async writes
*/
void
swap_pager_iodone1(bp)
struct buf *bp;
{
bp->b_flags |= B_DONE;
bp->b_flags &= ~B_ASYNC;
wakeup((caddr_t)bp);
/*
if ((bp->b_flags & B_READ) == 0)
vwakeup(bp);
*/
}
int
swap_pager_input(swp, m, count, reqpage)
register sw_pager_t swp;
vm_page_t *m;
int count, reqpage;
{
register struct buf *bp;
sw_blk_t swb[count];
register int s;
int i;
boolean_t rv;
vm_offset_t kva, off[count];
swp_clean_t spc;
vm_offset_t paging_offset;
vm_object_t object;
int reqaddr[count];
int first, last;
int failed;
int reqdskregion;
object = m[reqpage]->object;
paging_offset = object->paging_offset;
/*
* First determine if the page exists in the pager if this is
* a sync read. This quickly handles cases where we are
* following shadow chains looking for the top level object
* with the page.
*/
if (swp->sw_blocks == NULL) {
swap_pager_ridpages(m, count, reqpage);
return(VM_PAGER_FAIL);
}
for(i = 0; i < count; i++) {
vm_offset_t foff = m[i]->offset + paging_offset;
int ix = swap_pager_block_index(swp, foff);
if (ix >= swp->sw_nblocks) {
int j;
if( i <= reqpage) {
swap_pager_ridpages(m, count, reqpage);
return(VM_PAGER_FAIL);
}
for(j = i; j < count; j++) {
swap_pager_freepage(m[j]);
}
count = i;
break;
}
swb[i] = &swp->sw_blocks[ix];
off[i] = swap_pager_block_offset(swp, foff);
reqaddr[i] = swb[i]->swb_block[off[i]];
}
/* make sure that our required input request is existant */
if (reqaddr[reqpage] == SWB_EMPTY ||
(swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
swap_pager_ridpages(m, count, reqpage);
return(VM_PAGER_FAIL);
}
reqdskregion = reqaddr[reqpage] / dmmax;
/*
* search backwards for the first contiguous page to transfer
*/
failed = 0;
first = 0;
for (i = reqpage - 1; i >= 0; --i) {
if ( failed || (reqaddr[i] == SWB_EMPTY) ||
(swb[i]->swb_valid & (1 << off[i])) == 0 ||
(reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
((reqaddr[i] / dmmax) != reqdskregion)) {
failed = 1;
swap_pager_freepage(m[i]);
if (first == 0)
first = i + 1;
}
}
/*
* search forwards for the last contiguous page to transfer
*/
failed = 0;
last = count;
for (i = reqpage + 1; i < count; i++) {
if ( failed || (reqaddr[i] == SWB_EMPTY) ||
(swb[i]->swb_valid & (1 << off[i])) == 0 ||
(reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
((reqaddr[i] / dmmax) != reqdskregion)) {
failed = 1;
swap_pager_freepage(m[i]);
if (last == count)
last = i;
}
}
count = last;
if (first != 0) {
for (i = first; i < count; i++) {
m[i-first] = m[i];
reqaddr[i-first] = reqaddr[i];
off[i-first] = off[i];
}
count -= first;
reqpage -= first;
}
++swb[reqpage]->swb_locked;
/*
* at this point:
* "m" is a pointer to the array of vm_page_t for paging I/O
* "count" is the number of vm_page_t entries represented by "m"
* "object" is the vm_object_t for I/O
* "reqpage" is the index into "m" for the page actually faulted
*/
spc = NULL; /* we might not use an spc data structure */
if (count == 1) {
/*
* if a kva has not been allocated, we can only do a one page transfer,
* so we free the other pages that might have been allocated by
* vm_fault.
*/
swap_pager_ridpages(m, count, reqpage);
m[0] = m[reqpage];
reqaddr[0] = reqaddr[reqpage];
count = 1;
reqpage = 0;
/*
* get a swap pager clean data structure, block until we get it
*/
if (swap_pager_free.tqh_first == NULL) {
s = splbio();
if( curproc == pageproc)
(void) swap_pager_clean();
else
wakeup((caddr_t) &vm_pages_needed);
while (swap_pager_free.tqh_first == NULL) {
swap_pager_needflags |= SWAP_FREE_NEEDED;
tsleep((caddr_t)&swap_pager_free,
PVM, "swpfre", 0);
if( curproc == pageproc)
(void) swap_pager_clean();
else
wakeup((caddr_t) &vm_pages_needed);
}
splx(s);
}
spc = swap_pager_free.tqh_first;
TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
kva = spc->spc_kva;
bp = spc->spc_bp;
bzero(bp, sizeof *bp);
bp->b_spc = spc;
} else {
/*
* Get a swap buffer header to perform the IO
*/
bp = getpbuf();
kva = (vm_offset_t) bp->b_data;
}
/*
* map our page(s) into kva for input
*/
pmap_qenter( kva, m, count);
s = splbio();
bp->b_flags = B_BUSY | B_READ | B_CALL;
bp->b_iodone = swap_pager_iodone1;
bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
crhold(bp->b_rcred);
crhold(bp->b_wcred);
bp->b_un.b_addr = (caddr_t) kva;
bp->b_blkno = reqaddr[0];
bp->b_bcount = PAGE_SIZE*count;
bp->b_bufsize = PAGE_SIZE*count;
bgetvp( swapdev_vp, bp);
swp->sw_piip++;
cnt.v_swapin++;
cnt.v_swappgsin += count;
/*
* perform the I/O
*/
VOP_STRATEGY(bp);
/*
* wait for the sync I/O to complete
*/
while ((bp->b_flags & B_DONE) == 0) {
tsleep((caddr_t)bp, PVM, "swread", 0);
}
rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
--swp->sw_piip;
if (swp->sw_piip == 0)
wakeup((caddr_t) swp);
/*
* relpbuf does this, but we maintain our own buffer
* list also...
*/
if (bp->b_vp)
brelvp(bp);
splx(s);
--swb[reqpage]->swb_locked;
/*
* remove the mapping for kernel virtual
*/
pmap_qremove( kva, count);
if (spc) {
/*
* if we have used an spc, we need to free it.
*/
if( bp->b_rcred != NOCRED)
crfree(bp->b_rcred);
if( bp->b_wcred != NOCRED)
crfree(bp->b_wcred);
TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
if (swap_pager_needflags & SWAP_FREE_NEEDED) {
swap_pager_needflags &= ~SWAP_FREE_NEEDED;
wakeup((caddr_t)&swap_pager_free);
}
} else {
/*
* release the physical I/O buffer
*/
relpbuf(bp);
/*
* finish up input if everything is ok
*/
if( rv == VM_PAGER_OK) {
for (i = 0; i < count; i++) {
pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
m[i]->flags |= PG_CLEAN;
m[i]->flags &= ~PG_LAUNDRY;
if (i != reqpage) {
/*
* whether or not to leave the page activated
* is up in the air, but we should put the page
* on a page queue somewhere. (it already is in
* the object).
* After some emperical results, it is best
* to deactivate the readahead pages.
*/
vm_page_deactivate(m[i]);
/*
* just in case someone was asking for this
* page we now tell them that it is ok to use
*/
m[i]->flags &= ~PG_FAKE;
PAGE_WAKEUP(m[i]);
}
}
/*
* If we're out of swap space, then attempt to free
* some whenever pages are brought in. We must clear
* the clean flag so that the page contents will be
* preserved.
*/
if (swap_pager_full) {
for (i = 0; i < count; i++) {
m[i]->flags &= ~PG_CLEAN;
}
_swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE);
}
} else {
swap_pager_ridpages(m, count, reqpage);
}
}
return(rv);
}
int
swap_pager_output(swp, m, count, flags, rtvals)
register sw_pager_t swp;
vm_page_t *m;
int count;
int flags;
int *rtvals;
{
register struct buf *bp;
sw_blk_t swb[count];
register int s;
int i, j, ix;
boolean_t rv;
vm_offset_t kva, off, foff;
swp_clean_t spc;
vm_offset_t paging_offset;
vm_object_t object;
int reqaddr[count];
int failed;
/*
if( count > 1)
printf("off: 0x%x, count: %d\n", m[0]->offset, count);
*/
spc = NULL;
object = m[0]->object;
paging_offset = object->paging_offset;
failed = 0;
for(j=0;j<count;j++) {
foff = m[j]->offset + paging_offset;
ix = swap_pager_block_index(swp, foff);
swb[j] = 0;
if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
rtvals[j] = VM_PAGER_FAIL;
failed = 1;
continue;
} else {
rtvals[j] = VM_PAGER_OK;
}
swb[j] = &swp->sw_blocks[ix];
++swb[j]->swb_locked;
if( failed) {
rtvals[j] = VM_PAGER_FAIL;
continue;
}
off = swap_pager_block_offset(swp, foff);
reqaddr[j] = swb[j]->swb_block[off];
if( reqaddr[j] == SWB_EMPTY) {
int blk;
int tries;
int ntoget;
tries = 0;
s = splbio();
/*
* if any other pages have been allocated in this block, we
* only try to get one page.
*/
for (i = 0; i < SWB_NPAGES; i++) {
if (swb[j]->swb_block[i] != SWB_EMPTY)
break;
}
ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
/*
* this code is alittle conservative, but works
* (the intent of this code is to allocate small chunks
* for small objects)
*/
if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) {
ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE;
}
retrygetspace:
if (!swap_pager_full && ntoget > 1 &&
swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) {
for (i = 0; i < ntoget; i++) {
swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
swb[j]->swb_valid = 0;
}
reqaddr[j] = swb[j]->swb_block[off];
} else if (!swap_pager_getswapspace(btodb(PAGE_SIZE),
&swb[j]->swb_block[off])) {
/*
* if the allocation has failed, we try to reclaim space and
* retry.
*/
if (++tries == 1) {
swap_pager_reclaim();
goto retrygetspace;
}
rtvals[j] = VM_PAGER_AGAIN;
failed = 1;
} else {
reqaddr[j] = swb[j]->swb_block[off];
swb[j]->swb_valid &= ~(1<<off);
}
splx(s);
}
}
/*
* search forwards for the last contiguous page to transfer
*/
failed = 0;
for (i = 0; i < count; i++) {
if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) ||
(reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
(rtvals[i] != VM_PAGER_OK)) {
failed = 1;
if( rtvals[i] == VM_PAGER_OK)
rtvals[i] = VM_PAGER_AGAIN;
}
}
for(i = 0; i < count; i++) {
if( rtvals[i] != VM_PAGER_OK) {
if( swb[i])
--swb[i]->swb_locked;
}
}
for(i = 0; i < count; i++)
if( rtvals[i] != VM_PAGER_OK)
break;
if( i == 0) {
return VM_PAGER_AGAIN;
}
count = i;
for(i=0;i<count;i++) {
if( reqaddr[i] == SWB_EMPTY)
printf("I/O to empty block????\n");
}
/*
*/
/*
* For synchronous writes, we clean up
* all completed async pageouts.
*/
if ((flags & B_ASYNC) == 0) {
swap_pager_clean();
}
kva = 0;
/*
* we allocate a new kva for transfers > 1 page
* but for transfers == 1 page, the swap_pager_free list contains
* entries that have pre-allocated kva's (for efficiency).
* NOTE -- we do not use the physical buffer pool or the
* preallocated associated kva's because of the potential for
* deadlock. This is very subtile -- but deadlocks or resource
* contention must be avoided on pageouts -- or your system will
* sleep (forever) !!!
*/
/*
if ( count > 1) {
kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
if( !kva) {
for (i = 0; i < count; i++) {
if( swb[i])
--swb[i]->swb_locked;
rtvals[i] = VM_PAGER_AGAIN;
}
return VM_PAGER_AGAIN;
}
}
*/
/*
* get a swap pager clean data structure, block until we get it
*/
if (swap_pager_free.tqh_first == NULL) {
s = splbio();
if( curproc == pageproc)
(void) swap_pager_clean();
else
wakeup((caddr_t) &vm_pages_needed);
while (swap_pager_free.tqh_first == NULL) {
swap_pager_needflags |= SWAP_FREE_NEEDED;
tsleep((caddr_t)&swap_pager_free,
PVM, "swpfre", 0);
if( curproc == pageproc)
(void) swap_pager_clean();
else
wakeup((caddr_t) &vm_pages_needed);
}
splx(s);
}
spc = swap_pager_free.tqh_first;
TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
kva = spc->spc_kva;
/*
* map our page(s) into kva for I/O
*/
pmap_qenter(kva, m, count);
/*
* get the base I/O offset into the swap file
*/
for(i=0;i<count;i++) {
foff = m[i]->offset + paging_offset;
off = swap_pager_block_offset(swp, foff);
/*
* if we are setting the valid bit anew,
* then diminish the swap free space
*/
if( (swb[i]->swb_valid & (1 << off)) == 0)
vm_swap_size -= btodb(PAGE_SIZE);
/*
* set the valid bit
*/
swb[i]->swb_valid |= (1 << off);
/*
* and unlock the data structure
*/
--swb[i]->swb_locked;
}
s = splbio();
/*
* Get a swap buffer header and perform the IO
*/
bp = spc->spc_bp;
bzero(bp, sizeof *bp);
bp->b_spc = spc;
bp->b_flags = B_BUSY;
bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
if( bp->b_rcred != NOCRED)
crhold(bp->b_rcred);
if( bp->b_wcred != NOCRED)
crhold(bp->b_wcred);
bp->b_data = (caddr_t) kva;
bp->b_blkno = reqaddr[0];
bgetvp( swapdev_vp, bp);
bp->b_bcount = PAGE_SIZE*count;
bp->b_bufsize = PAGE_SIZE*count;
swapdev_vp->v_numoutput++;
/*
* If this is an async write we set up additional buffer fields
* and place a "cleaning" entry on the inuse queue.
*/
if ( flags & B_ASYNC ) {
spc->spc_flags = 0;
spc->spc_swp = swp;
for(i=0;i<count;i++)
spc->spc_m[i] = m[i];
spc->spc_count = count;
/*
* the completion routine for async writes
*/
bp->b_flags |= B_CALL;
bp->b_iodone = swap_pager_iodone;
bp->b_dirtyoff = 0;
bp->b_dirtyend = bp->b_bcount;
swp->sw_poip++;
TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
} else {
swp->sw_poip++;
bp->b_flags |= B_CALL;
bp->b_iodone = swap_pager_iodone1;
}
cnt.v_swapout++;
cnt.v_swappgsout += count;
/*
* perform the I/O
*/
VOP_STRATEGY(bp);
if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) {
if ((bp->b_flags & B_DONE) == B_DONE) {
swap_pager_clean();
}
splx(s);
for(i=0;i<count;i++) {
rtvals[i] = VM_PAGER_PEND;
}
return VM_PAGER_PEND;
}
/*
* wait for the sync I/O to complete
*/
while ((bp->b_flags & B_DONE) == 0) {
tsleep((caddr_t)bp, PVM, "swwrt", 0);
}
rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
--swp->sw_poip;
if (swp->sw_poip == 0)
wakeup((caddr_t) swp);
if (bp->b_vp)
brelvp(bp);
splx(s);
/*
* remove the mapping for kernel virtual
*/
pmap_qremove( kva, count);
/*
* if we have written the page, then indicate that the page
* is clean.
*/
if (rv == VM_PAGER_OK) {
for(i=0;i<count;i++) {
if( rtvals[i] == VM_PAGER_OK) {
m[i]->flags |= PG_CLEAN;
m[i]->flags &= ~PG_LAUNDRY;
pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
/*
* optimization, if a page has been read during the
* pageout process, we activate it.
*/
if ( (m[i]->flags & PG_ACTIVE) == 0 &&
pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))
vm_page_activate(m[i]);
}
}
} else {
for(i=0;i<count;i++) {
rtvals[i] = rv;
m[i]->flags |= PG_LAUNDRY;
}
}
if( bp->b_rcred != NOCRED)
crfree(bp->b_rcred);
if( bp->b_wcred != NOCRED)
crfree(bp->b_wcred);
TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
if (swap_pager_needflags & SWAP_FREE_NEEDED) {
swap_pager_needflags &= ~SWAP_FREE_NEEDED;
wakeup((caddr_t)&swap_pager_free);
}
return(rv);
}
boolean_t
swap_pager_clean()
{
register swp_clean_t spc, tspc;
register int s;
tspc = NULL;
if (swap_pager_done.tqh_first == NULL)
return FALSE;
for (;;) {
s = splbio();
/*
* Look up and removal from done list must be done
* at splbio() to avoid conflicts with swap_pager_iodone.
*/
while ((spc = swap_pager_done.tqh_first) != 0) {
pmap_qremove( spc->spc_kva, spc->spc_count);
swap_pager_finish(spc);
TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
goto doclean;
}
/*
* No operations done, thats all we can do for now.
*/
splx(s);
break;
/*
* The desired page was found to be busy earlier in
* the scan but has since completed.
*/
doclean:
if (tspc && tspc == spc) {
tspc = NULL;
}
spc->spc_flags = 0;
TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
if (swap_pager_needflags & SWAP_FREE_NEEDED) {
swap_pager_needflags &= ~SWAP_FREE_NEEDED;
wakeup((caddr_t)&swap_pager_free);
}
++cleandone;
splx(s);
}
return(tspc ? TRUE : FALSE);
}
void
swap_pager_finish(spc)
register swp_clean_t spc;
{
vm_object_t object = spc->spc_m[0]->object;
int i;
if ((object->paging_in_progress -= spc->spc_count) == 0)
thread_wakeup((int) object);
/*
* If no error mark as clean and inform the pmap system.
* If error, mark as dirty so we will try again.
* (XXX could get stuck doing this, should give up after awhile)
*/
if (spc->spc_flags & SPC_ERROR) {
for(i=0;i<spc->spc_count;i++) {
printf("swap_pager_finish: clean of page %lx failed\n",
(u_long)VM_PAGE_TO_PHYS(spc->spc_m[i]));
spc->spc_m[i]->flags |= PG_LAUNDRY;
}
} else {
for(i=0;i<spc->spc_count;i++) {
pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
spc->spc_m[i]->flags |= PG_CLEAN;
}
}
for(i=0;i<spc->spc_count;i++) {
/*
* we wakeup any processes that are waiting on
* these pages.
*/
PAGE_WAKEUP(spc->spc_m[i]);
}
nswiodone -= spc->spc_count;
return;
}
/*
* swap_pager_iodone
*/
void
swap_pager_iodone(bp)
register struct buf *bp;
{
register swp_clean_t spc;
int s;
s = splbio();
spc = (swp_clean_t) bp->b_spc;
TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
if (bp->b_flags & B_ERROR) {
spc->spc_flags |= SPC_ERROR;
printf("error %d blkno %lu sz %ld ",
bp->b_error, (u_long)bp->b_blkno, bp->b_bcount);
}
/*
if ((bp->b_flags & B_READ) == 0)
vwakeup(bp);
*/
bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC);
if (bp->b_vp) {
brelvp(bp);
}
if( bp->b_rcred != NOCRED)
crfree(bp->b_rcred);
if( bp->b_wcred != NOCRED)
crfree(bp->b_wcred);
nswiodone += spc->spc_count;
if (--spc->spc_swp->sw_poip == 0) {
wakeup((caddr_t)spc->spc_swp);
}
if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
swap_pager_inuse.tqh_first == 0) {
swap_pager_needflags &= ~SWAP_FREE_NEEDED;
wakeup((caddr_t)&swap_pager_free);
wakeup((caddr_t)&vm_pages_needed);
}
if (vm_pageout_pages_needed) {
wakeup((caddr_t)&vm_pageout_pages_needed);
}
if ((swap_pager_inuse.tqh_first == NULL) ||
(cnt.v_free_count < cnt.v_free_min &&
nswiodone + cnt.v_free_count >= cnt.v_free_min) ) {
wakeup((caddr_t)&vm_pages_needed);
}
splx(s);
}
/*
* return true if any swap control structures can be allocated
*/
int
swap_pager_ready() {
if( swap_pager_free.tqh_first)
return 1;
else
return 0;
}