Changes from John Dyson and myself:

Fixed remaining known bugs in the buffer IO and VM system.

vfs_bio.c:
Fixed some race conditions and locking bugs. Improved performance
by removing some (now) unnecessary code and fixing some broken
logic.
Fixed process accounting of # of FS outputs.
Properly handle NFS interrupts (B_EINTR).

(various)
Replaced calls to clrbuf() with calls to an optimized routine
called vfs_bio_clrbuf().

(various FS sync)
Sync out modified vnode_pager backed pages.

ffs_vnops.c:
Do two passes: Sync out file data first, then indirect blocks.

vm_fault.c:
Fixed deadly embrace caused by acquiring locks in the wrong order.

vnode_pager.c:
Changed to use buffer I/O system for writing out modified pages. This
should fix the problem with the modification date previous not getting
updated. Also dramatically simplifies the code. Note that this is
going to change in the future and be implemented via VOP_PUTPAGES().

vm_object.c:
Fixed a pile of bugs related to cleaning (vnode) objects. The performance
of vm_object_page_clean() is terrible when dealing with huge objects,
but this will change when we implement a binary tree to keep the object
pages sorted.

vm_pageout.c:
Fixed broken clustering of pageouts. Fixed race conditions and other
lockup style bugs in the scanning of pages. Improved performance.
This commit is contained in:
David Greenman 1995-04-09 06:03:56 +00:00
parent 213fd1b6e8
commit f6b04d2bfb
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=7695
19 changed files with 465 additions and 520 deletions

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94
* $Id: spec_vnops.c,v 1.9 1994/11/14 13:22:52 bde Exp $
* $Id: spec_vnops.c,v 1.10 1995/02/03 06:46:21 davidg Exp $
*/
#include <sys/param.h>
@ -101,7 +101,7 @@ struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
{ &vop_vfree_desc, spec_vfree }, /* vfree */
{ &vop_truncate_desc, spec_truncate }, /* truncate */
{ &vop_update_desc, spec_update }, /* update */
{ &vop_bwrite_desc, spec_bwrite }, /* bwrite */
{ &vop_bwrite_desc, vn_bwrite }, /* bwrite */
{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
};
struct vnodeopv_desc spec_vnodeop_opv_desc =

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94
* $Id: spec_vnops.c,v 1.9 1994/11/14 13:22:52 bde Exp $
* $Id: spec_vnops.c,v 1.10 1995/02/03 06:46:21 davidg Exp $
*/
#include <sys/param.h>
@ -101,7 +101,7 @@ struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
{ &vop_vfree_desc, spec_vfree }, /* vfree */
{ &vop_truncate_desc, spec_truncate }, /* truncate */
{ &vop_update_desc, spec_update }, /* update */
{ &vop_bwrite_desc, spec_bwrite }, /* bwrite */
{ &vop_bwrite_desc, vn_bwrite }, /* bwrite */
{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
};
struct vnodeopv_desc spec_vnodeop_opv_desc =

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)buf.h 8.7 (Berkeley) 1/21/94
* $Id: buf.h,v 1.15 1995/03/26 23:29:06 davidg Exp $
* $Id: buf.h,v 1.16 1995/03/28 07:57:33 bde Exp $
*/
#ifndef _SYS_BUF_H_
@ -132,7 +132,7 @@ struct buf {
#define B_RAW 0x00080000 /* Set by physio for raw transfers. */
#define B_READ 0x00100000 /* Read buffer. */
#define B_TAPE 0x00200000 /* Magnetic tape I/O. */
#define B_PDWANTED 0x00400000 /* Pageout daemon wants this buffer. */
#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
#define B_WANTED 0x00800000 /* Process wants this buffer. */
#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
#define B_WRITEINPROG 0x01000000 /* Write in progress. */
@ -214,7 +214,6 @@ void bdwrite __P((struct buf *));
void bawrite __P((struct buf *));
void brelse __P((struct buf *));
void vfs_bio_awrite __P((struct buf *));
struct buf *getnewbuf __P((int slpflag, int slptimeo, int));
struct buf * getpbuf __P((void));
struct buf *incore __P((struct vnode *, daddr_t));
int inmem __P((struct vnode *, daddr_t));
@ -233,6 +232,7 @@ void cluster_write __P((struct buf *, u_quad_t));
int physio __P((void (*)(), struct buf *, dev_t, int, u_int (*)(),
struct uio *));
u_int minphys __P((struct buf *));
void vfs_bio_clrbuf __P((struct buf *));
void vfs_busy_pages __P((struct buf *, int clear_modify));
void vfs_unbusy_pages(struct buf *);
void vwakeup __P((struct buf *));

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)buf.h 8.7 (Berkeley) 1/21/94
* $Id: buf.h,v 1.15 1995/03/26 23:29:06 davidg Exp $
* $Id: buf.h,v 1.16 1995/03/28 07:57:33 bde Exp $
*/
#ifndef _SYS_BUF_H_
@ -132,7 +132,7 @@ struct buf {
#define B_RAW 0x00080000 /* Set by physio for raw transfers. */
#define B_READ 0x00100000 /* Read buffer. */
#define B_TAPE 0x00200000 /* Magnetic tape I/O. */
#define B_PDWANTED 0x00400000 /* Pageout daemon wants this buffer. */
#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
#define B_WANTED 0x00800000 /* Process wants this buffer. */
#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
#define B_WRITEINPROG 0x01000000 /* Write in progress. */
@ -214,7 +214,6 @@ void bdwrite __P((struct buf *));
void bawrite __P((struct buf *));
void brelse __P((struct buf *));
void vfs_bio_awrite __P((struct buf *));
struct buf *getnewbuf __P((int slpflag, int slptimeo, int));
struct buf * getpbuf __P((void));
struct buf *incore __P((struct vnode *, daddr_t));
int inmem __P((struct vnode *, daddr_t));
@ -233,6 +232,7 @@ void cluster_write __P((struct buf *, u_quad_t));
int physio __P((void (*)(), struct buf *, dev_t, int, u_int (*)(),
struct uio *));
u_int minphys __P((struct buf *));
void vfs_bio_clrbuf __P((struct buf *));
void vfs_busy_pages __P((struct buf *, int clear_modify));
void vfs_unbusy_pages(struct buf *);
void vwakeup __P((struct buf *));

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)vnode.h 8.7 (Berkeley) 2/4/94
* $Id: vnode.h,v 1.17 1995/03/16 18:16:34 bde Exp $
* $Id: vnode.h,v 1.18 1995/03/29 05:09:44 davidg Exp $
*/
#ifndef _SYS_VNODE_H_
@ -158,6 +158,7 @@ struct vattr {
#define IO_SYNC 0x04 /* do I/O synchronously */
#define IO_NODELOCKED 0x08 /* underlying node already locked */
#define IO_NDELAY 0x10 /* FNDELAY flag set in file table */
#define IO_VMIO 0x20 /* data already in VMIO space */
/*
* Modes. Some values same as Ixxx entries from inode.h for now.

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93
* $Id: ffs_balloc.c,v 1.5 1995/03/03 22:13:16 davidg Exp $
* $Id: ffs_balloc.c,v 1.6 1995/03/19 14:29:13 davidg Exp $
*/
#include <sys/param.h>
@ -150,7 +150,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
bp = getblk(vp, bn, nsize, 0, 0);
bp->b_blkno = fsbtodb(fs, newb);
if (flags & B_CLRBUF)
clrbuf(bp);
vfs_bio_clrbuf(bp);
}
ip->i_db[bn] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
@ -182,7 +182,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
nb = newb;
bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
bp->b_blkno = fsbtodb(fs, newb);
clrbuf(bp);
vfs_bio_clrbuf(bp);
/*
* Write synchronously so that indirect blocks
* never point at garbage.
@ -225,7 +225,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
nb = newb;
nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb);
clrbuf(nbp);
vfs_bio_clrbuf(nbp);
/*
* Write synchronously so that indirect blocks
* never point at garbage.
@ -262,7 +262,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb);
if (flags & B_CLRBUF)
clrbuf(nbp);
vfs_bio_clrbuf(nbp);
bap[indirs[i].in_off] = nb;
/*
* If required, write synchronously, otherwise use

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94
* $Id: ffs_vfsops.c,v 1.14 1995/03/18 18:03:29 davidg Exp $
* $Id: ffs_vfsops.c,v 1.15 1995/03/28 07:57:47 bde Exp $
*/
#include <sys/param.h>
@ -60,6 +60,10 @@
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_object.h>
int ffs_sbupdate __P((struct ufsmount *, int));
int ffs_reload __P((struct mount *,struct ucred *,struct proc *));
int ffs_oldfscompat __P((struct fs *));
@ -662,6 +666,15 @@ ffs_sync(mp, waitfor, cred, p)
if (VOP_ISLOCKED(vp))
continue;
ip = VTOI(vp);
if (vp->v_vmdata &&
(((vm_object_t) vp->v_vmdata)->flags & OBJ_WRITEABLE)) {
if (vget(vp, 1))
goto loop;
_vm_object_page_clean( (vm_object_t) vp->v_vmdata,
0, 0, 0);
vput(vp);
}
if (((vp->v_type == VCHR) || ((ip->i_flag &
(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)) &&
vp->v_dirtyblkhd.lh_first == NULL)

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)ffs_vnops.c 8.7 (Berkeley) 2/3/94
* $Id: ffs_vnops.c,v 1.7 1994/10/10 01:04:40 phk Exp $
* $Id: ffs_vnops.c,v 1.8 1995/01/09 16:05:19 davidg Exp $
*/
#include <sys/param.h>
@ -50,6 +50,8 @@
#include <sys/lockf.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_object.h>
#include <miscfs/specfs/specdev.h>
#include <miscfs/fifofs/fifo.h>
@ -248,8 +250,18 @@ ffs_fsync(ap)
register struct buf *bp;
struct timeval tv;
struct buf *nbp;
int pass;
int s;
/*
* If the vnode has an object, then flush all of the dirty pages
* into the buffer cache.
*/
if (vp->v_vmdata)
_vm_object_page_clean((vm_object_t)vp->v_vmdata, 0, 0, 0);
pass = 0;
/*
* Flush all dirty buffers associated with a vnode.
*/
@ -257,20 +269,20 @@ ffs_fsync(ap)
s = splbio();
for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
nbp = bp->b_vnbufs.le_next;
if ((bp->b_flags & B_BUSY))
if ((bp->b_flags & B_BUSY) || (pass == 0 && (bp->b_blkno < 0)))
continue;
if ((bp->b_flags & B_DELWRI) == 0)
panic("ffs_fsync: not dirty");
if (bp->b_vp != vp && ap->a_waitfor != MNT_NOWAIT) {
if (bp->b_vp != vp || ap->a_waitfor != MNT_NOWAIT) {
bremfree(bp);
bp->b_flags |= B_BUSY;
splx(s);
/*
* Wait for I/O associated with indirect blocks to complete,
* since there is no way to quickly wait for them below.
*/
/*
* Wait for I/O associated with indirect blocks to complete,
* since there is no way to quickly wait for them below.
*/
if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT)
(void) bawrite(bp);
else
@ -281,12 +293,20 @@ ffs_fsync(ap)
}
goto loop;
}
splx(s);
if (pass == 0) {
pass = 1;
goto loop;
}
if (ap->a_waitfor == MNT_WAIT) {
s = splbio();
while (vp->v_numoutput) {
vp->v_flag |= VBWAIT;
(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "ffsfsn", 0);
}
splx(s);
#ifdef DIAGNOSTIC
if (vp->v_dirtyblkhd.lh_first) {
vprint("ffs_fsync: dirty", vp);
@ -294,7 +314,6 @@ ffs_fsync(ap)
}
#endif
}
splx(s);
tv = time;
return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT));

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)lfs_balloc.c 8.1 (Berkeley) 6/11/93
* $Id: lfs_balloc.c,v 1.4 1995/03/04 03:24:43 davidg Exp $
* $Id: lfs_balloc.c,v 1.5 1995/03/28 07:58:02 bde Exp $
*/
#include <sys/param.h>
#include <sys/systm.h>
@ -99,7 +99,7 @@ lfs_balloc(vp, iosize, lbn, bpp)
} else {
ip->i_blocks += bb;
ip->i_lfs->lfs_bfree -= bb;
clrbuf(ibp);
vfs_bio_clrbuf(ibp);
error = VOP_BWRITE(ibp);
}
} else
@ -123,7 +123,7 @@ lfs_balloc(vp, iosize, lbn, bpp)
ip->i_blocks += bb;
ip->i_lfs->lfs_bfree -= bb;
if (iosize != fs->lfs_bsize)
clrbuf(bp);
vfs_bio_clrbuf(bp);
}
else if (iosize == fs->lfs_bsize)
bp->b_blkno = daddr; /* Skip the I/O */

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)lfs_vnops.c 8.5 (Berkeley) 12/30/93
* $Id: lfs_vnops.c,v 1.6 1995/01/04 23:46:34 gibbs Exp $
* $Id: lfs_vnops.c,v 1.7 1995/03/28 07:58:06 bde Exp $
*/
#include <sys/param.h>
@ -50,6 +50,8 @@
#include <sys/malloc.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_object.h>
#include <miscfs/specfs/specdev.h>
#include <miscfs/fifofs/fifo.h>
@ -232,6 +234,13 @@ lfs_fsync(ap)
struct timeval tv;
int error;
tv = time;
/*
* If the vnode has an object, then flush all of the dirty pages
* into the buffer cache.
*/
if (ap->a_vp->v_vmdata)
_vm_object_page_clean((vm_object_t)ap->a_vp->v_vmdata, 0, 0, 0);
error = (VOP_UPDATE(ap->a_vp, &tv, &tv,
ap->a_waitfor == MNT_WAIT ? LFS_SYNC : 0));
if(ap->a_waitfor == MNT_WAIT && ap->a_vp->v_dirtyblkhd.lh_first != NULL)

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)ufs_readwrite.c 8.7 (Berkeley) 1/21/94
* $Id: ufs_readwrite.c,v 1.6 1995/01/09 16:05:26 davidg Exp $
* $Id: ufs_readwrite.c,v 1.7 1995/01/24 10:02:00 davidg Exp $
*/
#ifdef LFS_READWRITE
@ -263,9 +263,12 @@ WRITE(ap)
#ifdef LFS_READWRITE
(void)VOP_BWRITE(bp);
#else
if (ioflag & IO_SYNC)
if (ioflag & IO_VMIO)
bp->b_flags |= B_RELBUF;
if (ioflag & IO_SYNC) {
(void)bwrite(bp);
else if (xfersize + blkoffset == fs->fs_bsize) {
} else if (xfersize + blkoffset == fs->fs_bsize) {
if (doclusterwrite) {
bp->b_flags |= B_CLUSTEROK;
cluster_write(bp, ip->i_size);

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)ufs_vnops.c 8.10 (Berkeley) 4/1/94
* $Id: ufs_vnops.c,v 1.18 1995/03/19 09:47:32 davidg Exp $
* $Id: ufs_vnops.c,v 1.19 1995/03/19 13:44:03 davidg Exp $
*/
#include <sys/param.h>
@ -1643,7 +1643,7 @@ ufs_strategy(ap)
return (error);
}
if ((long)bp->b_blkno == -1)
clrbuf(bp);
vfs_bio_clrbuf(bp);
}
if ((long)bp->b_blkno == -1) {
biodone(bp);

View File

@ -66,7 +66,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_fault.c,v 1.20 1995/03/01 23:29:55 davidg Exp $
* $Id: vm_fault.c,v 1.21 1995/03/27 02:41:00 davidg Exp $
*/
/*
@ -94,6 +94,10 @@ int vm_fault_additional_pages __P((vm_object_t, vm_offset_t, vm_page_t, int, int
extern int swap_pager_full;
extern int vm_pageout_proc_limit;
struct vnode *vnode_pager_lock __P((vm_object_t object));
void vnode_pager_unlock __P((struct vnode *));
/*
* vm_fault:
*
@ -137,6 +141,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
vm_page_t marray[VM_FAULT_READ];
int spl;
int hardfault = 0;
struct vnode *vp = NULL;
cnt.v_vm_faults++; /* needs lock XXX */
/*
@ -173,6 +178,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
vm_object_unlock(first_object); \
} \
UNLOCK_MAP; \
if (vp != NULL) vnode_pager_unlock(vp); \
}
#define UNLOCK_AND_DEALLOCATE { \
@ -192,6 +198,9 @@ RetryFault:;
&first_offset, &prot, &wired, &su)) != KERN_SUCCESS) {
return (result);
}
vp = (struct vnode *) vnode_pager_lock(first_object);
lookup_still_valid = TRUE;
if (wired)
@ -271,6 +280,7 @@ RetryFault:;
vm_object_deallocate(first_object);
goto RetryFault;
}
if ((m->flags & PG_CACHE) &&
(cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) {
UNLOCK_AND_DEALLOCATE;
@ -364,8 +374,13 @@ RetryFault:;
* if moved.
*/
m = vm_page_lookup(object, offset);
m->valid = VM_PAGE_BITS_ALL;
if( !m) {
UNLOCK_AND_DEALLOCATE;
goto RetryFault;
}
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
m->valid = VM_PAGE_BITS_ALL;
hardfault++;
break;
}
@ -808,6 +823,7 @@ RetryFault:;
if (prot & VM_PROT_WRITE) {
m->flags |= PG_WRITEABLE;
m->object->flags |= OBJ_WRITEABLE;
/*
* If the fault is a write, we know that this page is being
* written NOW. This will save on the pmap_is_modified() calls
@ -817,11 +833,12 @@ RetryFault:;
m->dirty = VM_PAGE_BITS_ALL;
}
}
m->flags |= PG_MAPPED;
pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired);
#if 0
if( ((prot & VM_PROT_WRITE) == 0) && change_wiring == 0 && wired == 0)
if (change_wiring == 0 && wired == 0)
pmap_prefault(map->pmap, vaddr, entry, first_object);
#endif

View File

@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_object.c,v 1.39 1995/03/25 06:09:33 davidg Exp $
* $Id: vm_object.c,v 1.40 1995/03/25 08:42:14 davidg Exp $
*/
/*
@ -86,6 +86,7 @@
static void _vm_object_allocate(vm_size_t, vm_object_t);
/*
* Virtual memory objects maintain the actual data
* associated with allocated virtual memory. A given
@ -370,7 +371,6 @@ vm_object_terminate(object)
register vm_page_t p, next;
vm_object_t shadow_object;
int s;
struct vnode *vp = NULL;
/*
* Detach the object from its shadow if we are the shadow's copy.
@ -379,23 +379,12 @@ vm_object_terminate(object)
vm_object_lock(shadow_object);
if (shadow_object->copy == object)
shadow_object->copy = NULL;
#if 0
else if (shadow_object->copy != NULL)
panic("vm_object_terminate: copy/shadow inconsistency");
#endif
vm_object_unlock(shadow_object);
}
if (object->pager && (object->pager->pg_type == PG_VNODE)) {
vn_pager_t vnp = object->pager->pg_data;
vp = vnp->vnp_vp;
VOP_LOCK(vp);
vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
}
/*
* Wait until the pageout daemon is through with the object.
* wait for the pageout daemon to be done with the object
*/
s = splhigh();
while (object->paging_in_progress) {
vm_object_unlock(object);
@ -405,31 +394,6 @@ vm_object_terminate(object)
}
splx(s);
/*
* While the paging system is locked, pull the object's pages off the
* active and inactive queues. This keeps the pageout daemon from
* playing with them during vm_pager_deallocate.
*
* We can't free the pages yet, because the object's pager may have to
* write them out before deallocating the paging space.
*/
for (p = object->memq.tqh_first; p; p = next) {
VM_PAGE_CHECK(p);
next = p->listq.tqe_next;
vm_page_lock_queues();
if (p->flags & PG_CACHE)
vm_page_free(p);
else {
s = splhigh();
vm_page_unqueue(p);
splx(s);
}
vm_page_unlock_queues();
p = next;
}
if (object->paging_in_progress != 0)
panic("vm_object_deallocate: pageout in progress");
@ -437,11 +401,14 @@ vm_object_terminate(object)
* Clean and free the pages, as appropriate. All references to the
* object are gone, so we don't need to lock it.
*/
if (vp != NULL) {
VOP_UNLOCK(vp);
vm_object_page_clean(object, 0, 0, TRUE);
if (object->pager && (object->pager->pg_type == PG_VNODE)) {
vn_pager_t vnp = object->pager->pg_data;
struct vnode *vp;
vp = vnp->vnp_vp;
VOP_LOCK(vp);
vinvalbuf(vp, 0, NOCRED, NULL, 0, 0);
(void) _vm_object_page_clean(object, 0, 0, TRUE);
vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
VOP_UNLOCK(vp);
}
@ -490,62 +457,180 @@ vm_object_terminate(object)
*
* The object must be locked.
*/
void
vm_object_page_clean(object, start, end, syncio)
register vm_object_t object;
register vm_offset_t start;
register vm_offset_t end;
_vm_object_page_clean(object, start, end, syncio)
vm_object_t object;
vm_offset_t start;
vm_offset_t end;
boolean_t syncio;
{
register vm_page_t p, nextp;
int size;
int s;
register vm_page_t p;
register vm_offset_t tstart, tend;
int pass;
int pgcount, s;
int allclean;
if (object->pager == NULL)
if (object->pager == NULL || (object->flags & OBJ_WRITEABLE) == 0)
return;
if (start != end) {
start = trunc_page(start);
end = round_page(end);
}
size = end - start;
again:
/*
* Wait until the pageout daemon is through with the object.
*/
s = splhigh();
while (object->paging_in_progress) {
object->flags |= OBJ_PIPWNT;
tsleep(object, PVM, "objpcw", 0);
object->flags &= ~OBJ_WRITEABLE;
pass = 0;
startover:
tstart = start;
if (end == 0) {
tend = object->size;
} else {
tend = end;
}
/*
* Wait until potential collapse operation is complete
*/
if (object->flags & OBJ_INTERNAL) {
s = splhigh();
while (object->paging_in_progress) {
object->flags |= OBJ_PIPWNT;
tsleep(object, PVM, "objpcw", 0);
}
splx(s);
}
splx(s);
nextp = object->memq.tqh_first;
while ((p = nextp) && ((start == end) || (size != 0))) {
nextp = p->listq.tqe_next;
if (start == end || (p->offset >= start && p->offset < end)) {
if ((p->flags & PG_BUSY) || p->busy) {
s = splhigh();
pgcount = object->resident_page_count;
p->flags |= PG_WANTED;
tsleep(p, PVM, "objpcn", 0);
splx(s);
goto again;
if (pass == 0 &&
(pgcount < 128 || pgcount > (object->size / (8 * PAGE_SIZE)))) {
allclean = 1;
for(; pgcount && (tstart < tend); tstart += PAGE_SIZE) {
p = vm_page_lookup(object, tstart);
if (!p)
continue;
--pgcount;
s = splhigh();
TAILQ_REMOVE(&object->memq, p, listq);
TAILQ_INSERT_TAIL(&object->memq, p, listq);
splx(s);
if ((p->flags & (PG_BUSY|PG_CACHE)) || p->busy || p->valid == 0 ||
p->bmapped) {
continue;
}
size -= PAGE_SIZE;
vm_page_test_dirty(p);
if ((p->dirty & p->valid) != 0) {
vm_pageout_clean(p, VM_PAGEOUT_FORCE);
goto again;
if ((p->valid & p->dirty) != 0) {
vm_offset_t tincr;
tincr = vm_pageout_clean(p, VM_PAGEOUT_FORCE);
pgcount -= (tincr - 1);
tincr *= PAGE_SIZE;
tstart += tincr - PAGE_SIZE;
allclean = 0;
}
}
if (!allclean) {
pass = 1;
goto startover;
}
object->flags &= ~OBJ_WRITEABLE;
return;
}
allclean = 1;
while ((p = object->memq.tqh_first) != NULL && pgcount > 0) {
if (p->flags & PG_CACHE) {
goto donext;
}
if (p->offset >= tstart && p->offset < tend) {
if (p->valid == 0 || p->bmapped) {
goto donext;
}
s = splhigh();
if ((p->flags & PG_BUSY) || p->busy) {
allclean = 0;
if (pass > 0) {
p->flags |= PG_WANTED;
tsleep(p, PVM, "objpcn", 0);
splx(s);
continue;
} else {
splx(s);
goto donext;
}
}
TAILQ_REMOVE(&object->memq, p, listq);
TAILQ_INSERT_TAIL(&object->memq, p, listq);
splx(s);
pgcount--;
vm_page_test_dirty(p);
if ((p->valid & p->dirty) != 0) {
vm_pageout_clean(p, VM_PAGEOUT_FORCE);
allclean = 0;
}
continue;
}
donext:
TAILQ_REMOVE(&object->memq, p, listq);
TAILQ_INSERT_TAIL(&object->memq, p, listq);
pgcount--;
}
if ((!allclean && (pass == 0)) || (object->flags & OBJ_WRITEABLE)) {
pass = 1;
object->flags &= ~OBJ_WRITEABLE;
goto startover;
}
return;
}
void
vm_object_page_clean(object, start, end, syncio)
register vm_object_t object;
register vm_offset_t start;
register vm_offset_t end;
boolean_t syncio;
{
if (object->pager && (object->flags & OBJ_WRITEABLE) &&
(object->pager->pg_type == PG_VNODE)) {
vn_pager_t vnp = (vn_pager_t) object->pager->pg_data;
struct vnode *vp;
vp = vnp->vnp_vp;
vget(vp, 1);
_vm_object_page_clean(object, start, end, syncio);
vput(vp);
} else {
_vm_object_page_clean(object, start, end, syncio);
}
}
void
vm_object_cache_clean()
{
vm_object_t object;
vm_object_cache_lock();
while(1) {
object = vm_object_cached_list.tqh_first;
while( object) {
if( (object->flags & OBJ_WRITEABLE) &&
object->pager &&
object->pager->pg_type == PG_VNODE) {
vm_object_page_clean(object, 0, 0, 0);
goto loop;
}
object = object->cached_list.tqe_next;
}
return;
loop:
}
}
/*
* vm_object_deactivate_pages
*
@ -1045,7 +1130,7 @@ vm_object_qcollapse(object)
next = p->listq.tqe_next;
if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) ||
!p->valid || p->hold_count || p->wire_count || p->busy || p->bmapped) {
!p->valid || p->hold_count || p->wire_count || p->busy) {
p = next;
continue;
}

View File

@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_object.h,v 1.15 1995/03/21 01:11:43 davidg Exp $
* $Id: vm_object.h,v 1.16 1995/03/22 12:24:11 davidg Exp $
*/
/*
@ -113,6 +113,7 @@ struct vm_object {
#define OBJ_ILOCKED 0x0010 /* lock from modification */
#define OBJ_ILOCKWT 0x0020 /* wait for lock from modification */
#define OBJ_PIPWNT 0x0040 /* paging in progress wanted */
#define OBJ_WRITEABLE 0x0080 /* object has been made writeable */
TAILQ_HEAD(vm_object_hash_head, vm_object_hash_entry);
@ -174,6 +175,7 @@ void vm_object_deallocate __P((vm_object_t));
void vm_object_enter __P((vm_object_t, vm_pager_t));
void vm_object_init __P((vm_size_t));
vm_object_t vm_object_lookup __P((vm_pager_t));
void _vm_object_page_clean __P((vm_object_t, vm_offset_t, vm_offset_t, boolean_t));
void vm_object_page_clean __P((vm_object_t, vm_offset_t, vm_offset_t, boolean_t));
void vm_object_page_remove __P((vm_object_t, vm_offset_t, vm_offset_t, boolean_t));
void vm_object_pmap_copy __P((vm_object_t, vm_offset_t, vm_offset_t));

View File

@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
* $Id: vm_page.c,v 1.26 1995/03/19 14:29:26 davidg Exp $
* $Id: vm_page.c,v 1.27 1995/03/25 08:47:35 davidg Exp $
*/
/*
@ -336,7 +336,7 @@ vm_page_startup(starta, enda, vaddr)
++cnt.v_free_count;
m = PHYS_TO_VM_PAGE(pa);
m->flags = PG_FREE;
vm_page_set_clean(m, 0, PAGE_SIZE);
m->dirty = 0;
m->object = 0;
m->phys_addr = pa;
m->hold_count = 0;
@ -912,8 +912,6 @@ vm_page_activate(m)
if (m->wire_count == 0) {
TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
m->flags |= PG_ACTIVE;
TAILQ_REMOVE(&m->object->memq, m, listq);
TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
if (m->act_count < 5)
m->act_count = 5;
else if( m->act_count < ACT_MAX)
@ -1088,9 +1086,9 @@ vm_page_is_valid(m, base, size)
int base;
int size;
{
int bits;
int bits = vm_page_bits(base, size);
if (m->valid && ((m->valid & (bits = vm_page_bits(base, size))) == bits))
if (m->valid && ((m->valid & bits) == bits))
return 1;
else
return 0;
@ -1124,8 +1122,7 @@ vm_page_test_dirty(m)
vm_page_t m;
{
if ((m->dirty != VM_PAGE_BITS_ALL) &&
pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
m->dirty = VM_PAGE_BITS_ALL;
}
}
@ -1140,6 +1137,8 @@ vm_page_set_clean(m, base, size)
int size;
{
m->dirty &= ~vm_page_bits(base, size);
if( base == 0 && size == PAGE_SIZE)
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
}
/*
@ -1175,3 +1174,4 @@ print_page_info()
printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
}

View File

@ -65,7 +65,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_pageout.c,v 1.43 1995/03/28 05:35:29 davidg Exp $
* $Id: vm_pageout.c,v 1.44 1995/03/28 05:58:35 davidg Exp $
*/
/*
@ -80,11 +80,13 @@
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/signalvar.h>
#include <sys/vnode.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/swap_pager.h>
#include <vm/vnode_pager.h>
extern vm_map_t kmem_map;
int vm_pages_needed; /* Event on which pageout daemon sleeps */
@ -101,6 +103,7 @@ extern int nswiodone;
extern int swap_pager_full;
extern int vm_swap_size;
extern int swap_pager_ready();
extern int vfs_update_wakeup;
#define MAXSCAN 1024 /* maximum number of pages to scan in queues */
@ -136,8 +139,8 @@ vm_pageout_clean(m, sync)
register vm_object_t object;
register vm_pager_t pager;
int pageout_status[VM_PAGEOUT_PAGE_COUNT];
vm_page_t ms[VM_PAGEOUT_PAGE_COUNT];
int pageout_count;
vm_page_t ms[VM_PAGEOUT_PAGE_COUNT], mb[VM_PAGEOUT_PAGE_COUNT];
int pageout_count, b_pageout_count;
int anyok = 0;
int i;
vm_offset_t offset = m->offset;
@ -162,7 +165,7 @@ vm_pageout_clean(m, sync)
(cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
return 0;
if ((!sync && m->bmapped != 0 && m->hold_count != 0) ||
if ((!sync && m->hold_count != 0) ||
((m->busy != 0) || (m->flags & PG_BUSY)))
return 0;
@ -179,7 +182,8 @@ vm_pageout_clean(m, sync)
ms[i] = mt = vm_page_lookup(object, offset + i * NBPG);
if (mt) {
vm_page_test_dirty(mt);
if (mt->flags & (PG_BUSY|PG_CACHE) || mt->busy)
break;
/*
* we can cluster ONLY if: ->> the page is NOT
* busy, and is NOT clean the page is not
@ -188,19 +192,54 @@ vm_pageout_clean(m, sync)
* inactive, or a seldom used active page. 2)
* or we force the issue.
*/
vm_page_test_dirty(mt);
if ((mt->dirty & mt->valid) != 0
&& (((mt->flags & (PG_BUSY | PG_INACTIVE)) == PG_INACTIVE)
|| sync == VM_PAGEOUT_FORCE)
&& ((mt->flags & PG_INACTIVE) ||
(sync == VM_PAGEOUT_FORCE))
&& (mt->wire_count == 0)
&& (mt->busy == 0)
&& (mt->hold_count == 0)
&& (mt->bmapped == 0))
&& (mt->hold_count == 0))
pageout_count++;
else
break;
} else
break;
}
if ((pageout_count < vm_pageout_page_count) && (offset != 0)) {
b_pageout_count = 0;
for (i = 0; i < vm_pageout_page_count-pageout_count; i++) {
vm_page_t mt;
mt = vm_page_lookup(object, offset - (i + 1) * NBPG);
if (mt) {
if (mt->flags & (PG_BUSY|PG_CACHE) || mt->busy)
break;
vm_page_test_dirty(mt);
if ((mt->dirty & mt->valid) != 0
&& ((mt->flags & PG_INACTIVE) ||
(sync == VM_PAGEOUT_FORCE))
&& (mt->wire_count == 0)
&& (mt->hold_count == 0)) {
mb[b_pageout_count] = mt;
b_pageout_count++;
if ((offset - (i + 1) * NBPG) == 0)
break;
} else
break;
} else
break;
}
if (b_pageout_count > 0) {
for(i=pageout_count - 1;i>=0;--i) {
ms[i+b_pageout_count] = ms[i];
}
for(i=0;i<b_pageout_count;i++) {
ms[i] = mb[b_pageout_count - (i + 1)];
}
pageout_count += b_pageout_count;
}
}
/*
* we allow reads during pageouts...
*/
@ -353,7 +392,6 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only)
vm_page_lock_queues();
if (p->wire_count != 0 ||
p->hold_count != 0 ||
p->bmapped != 0 ||
p->busy != 0 ||
!pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
p = next;
@ -385,8 +423,6 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only)
} else {
TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
TAILQ_REMOVE(&object->memq, p, listq);
TAILQ_INSERT_TAIL(&object->memq, p, listq);
}
/*
* see if we are done yet
@ -412,8 +448,6 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only)
TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
TAILQ_REMOVE(&object->memq, p, listq);
TAILQ_INSERT_TAIL(&object->memq, p, listq);
}
} else if ((p->flags & (PG_INACTIVE | PG_BUSY)) == PG_INACTIVE) {
vm_page_protect(p, VM_PROT_NONE);
@ -488,13 +522,14 @@ int
vm_pageout_scan()
{
vm_page_t m;
int page_shortage, maxscan, maxlaunder;
int page_shortage, maxscan, maxlaunder, pcount;
int pages_freed;
vm_page_t next;
struct proc *p, *bigproc;
vm_offset_t size, bigsize;
vm_object_t object;
int force_wakeup = 0;
int vnodes_skipped = 0;
/* calculate the total cached size */
@ -523,10 +558,10 @@ vm_pageout_scan()
MAXLAUNDER : cnt.v_inactive_target;
rescan1:
maxscan = min(cnt.v_inactive_count, MAXSCAN);
maxscan = cnt.v_inactive_count;
m = vm_page_queue_inactive.tqh_first;
while (m && (maxscan-- > 0) &&
(cnt.v_cache_count < (cnt.v_cache_min + cnt.v_free_target))) {
while ((m != NULL) && (maxscan-- > 0) &&
((cnt.v_cache_count + cnt.v_free_count) < (cnt.v_cache_min + cnt.v_free_target))) {
vm_page_t next;
cnt.v_pdpages++;
@ -542,8 +577,7 @@ vm_pageout_scan()
/*
* dont mess with busy pages
*/
if (m->hold_count || m->busy || (m->flags & PG_BUSY) ||
m->bmapped != 0) {
if (m->hold_count || m->busy || (m->flags & PG_BUSY)) {
TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
m = next;
@ -566,24 +600,43 @@ vm_pageout_scan()
m = next;
continue;
}
vm_page_test_dirty(m);
if ((m->dirty & m->valid) == 0) {
if (m->valid == 0) {
pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
vm_page_free(m);
cnt.v_dfree++;
vm_page_test_dirty(m);
if (m->dirty == 0) {
if (m->bmapped == 0) {
if (m->valid == 0) {
pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
vm_page_free(m);
cnt.v_dfree++;
} else {
vm_page_cache(m);
}
++pages_freed;
} else {
vm_page_cache(m);
m = next;
continue;
}
} else if (maxlaunder > 0) {
int written;
struct vnode *vp = NULL;
object = m->object;
if ((object->flags & OBJ_DEAD) || !vm_object_lock_try(object)) {
m = next;
continue;
}
if (object->pager && object->pager->pg_type == PG_VNODE) {
vp = ((vn_pager_t) object->pager->pg_data)->vnp_vp;
if (VOP_ISLOCKED(vp) || vget(vp, 1)) {
vm_object_unlock(object);
if (object->flags & OBJ_WRITEABLE)
++vnodes_skipped;
m = next;
continue;
}
}
/*
* If a page is dirty, then it is either being washed
* (but not yet cleaned) or it is still in the
@ -591,6 +644,10 @@ vm_pageout_scan()
* start the cleaning operation.
*/
written = vm_pageout_clean(m, 0);
if (vp)
vput(vp);
vm_object_unlock(object);
if (!next) {
@ -619,11 +676,14 @@ vm_pageout_scan()
if (page_shortage <= 0) {
if (pages_freed == 0) {
page_shortage = cnt.v_free_min - cnt.v_free_count;
} else {
page_shortage = 1;
}
}
maxscan = min(cnt.v_active_count, MAXSCAN);
maxscan = MAXSCAN;
pcount = cnt.v_active_count;
m = vm_page_queue_active.tqh_first;
while (m && (maxscan-- > 0) && (page_shortage > 0)) {
while ((m != NULL) && (maxscan > 0) && (pcount-- > 0) && (page_shortage > 0)) {
cnt.v_pdpages++;
next = m->pageq.tqe_next;
@ -633,8 +693,7 @@ vm_pageout_scan()
*/
if ((m->busy != 0) ||
(m->flags & PG_BUSY) ||
(m->hold_count != 0) ||
(m->bmapped != 0)) {
(m->hold_count != 0)) {
TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
m = next;
@ -651,10 +710,6 @@ vm_pageout_scan()
}
TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
s = splhigh();
TAILQ_REMOVE(&m->object->memq, m, listq);
TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
splx(s);
} else {
m->flags &= ~PG_REFERENCED;
pmap_clear_reference(VM_PAGE_TO_PHYS(m));
@ -665,9 +720,9 @@ vm_pageout_scan()
*/
if (!m->act_count && (page_shortage > 0)) {
if (m->object->ref_count == 0) {
vm_page_test_dirty(m);
--page_shortage;
if ((m->dirty & m->valid) == 0) {
vm_page_test_dirty(m);
if ((m->bmapped == 0) && (m->dirty == 0) ) {
m->act_count = 0;
vm_page_cache(m);
} else {
@ -682,6 +737,7 @@ vm_pageout_scan()
TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
}
}
maxscan--;
m = next;
}
@ -697,6 +753,18 @@ vm_pageout_scan()
cnt.v_dfree++;
}
/*
* If we didn't get enough free pages, and we have skipped a vnode
* in a writeable object, wakeup the sync daemon.
*/
if (vnodes_skipped &&
(cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) {
if (!vfs_update_wakeup) {
vfs_update_wakeup = 1;
wakeup((caddr_t) &vfs_update_wakeup);
}
}
/*
* make sure that we have swap space -- if we are low on memory and
* swap -- then kill the biggest process.
@ -758,6 +826,8 @@ vm_pageout()
* Initialize some paging parameters.
*/
cnt.v_interrupt_free_min = 2;
if (cnt.v_page_count > 1024)
cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
else
@ -766,7 +836,8 @@ vm_pageout()
* free_reserved needs to include enough for the largest swap pager
* structures plus enough for any pv_entry structs when paging.
*/
cnt.v_pageout_free_min = 6 + cnt.v_page_count / 1024;
cnt.v_pageout_free_min = 6 + cnt.v_page_count / 1024 +
cnt.v_interrupt_free_min;
cnt.v_free_reserved = cnt.v_pageout_free_min + 2;
cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved;
cnt.v_free_min += cnt.v_free_reserved;
@ -785,8 +856,6 @@ vm_pageout()
if (vm_page_max_wired == 0)
vm_page_max_wired = cnt.v_free_count / 3;
cnt.v_interrupt_free_min = 2;
(void) swap_pager_alloc(0, 0, 0, 0);
/*

View File

@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_pageout.h,v 1.9 1995/03/01 23:30:03 davidg Exp $
* $Id: vm_pageout.h,v 1.10 1995/03/16 18:17:30 bde Exp $
*/
#ifndef _VM_VM_PAGEOUT_H_
@ -112,7 +112,6 @@ vm_wait()
if (curproc == pageproc) {
vm_pageout_pages_needed = 1;
tsleep((caddr_t) &vm_pageout_pages_needed, PSWP, "vmwait", 0);
vm_pageout_pages_needed = 0;
} else {
if (!vm_pages_needed) {
vm_pages_needed++;

View File

@ -37,7 +37,7 @@
* SUCH DAMAGE.
*
* from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
* $Id: vnode_pager.c,v 1.35 1995/03/21 23:07:19 davidg Exp $
* $Id: vnode_pager.c,v 1.36 1995/03/21 23:09:53 davidg Exp $
*/
/*
@ -49,21 +49,13 @@
*/
/*
* MODIFICATIONS:
* John S. Dyson 08 Dec 93
*
* This file in conjunction with some vm_fault mods, eliminate the performance
* advantage for using the buffer cache and minimize memory copies.
*
* 1) Supports multiple - block reads
* 1) Supports multiple - block reads/writes
* 2) Bypasses buffer cache for reads
*
* TODO:
* Implement getpage/putpage interface for filesystems. Should
* greatly re-simplify the vnode_pager.
*
* 1) Totally bypass buffer cache for reads
* (Currently will still sometimes use buffer cache for reads)
* 2) Bypass buffer cache for writes
* (Code does not support it, but mods are simple)
*/
#include <sys/param.h>
@ -149,7 +141,8 @@ vnode_pager_alloc(handle, size, prot, offset)
* with vm_pager_lookup.
*/
vp = (struct vnode *) handle;
while ((object = (vm_object_t) vp->v_vmdata) && (object->flags & OBJ_DEAD))
while ((object = (vm_object_t) vp->v_vmdata) &&
(object->flags & OBJ_DEAD))
tsleep((caddr_t) object, PVM, "vadead", 0);
pager = NULL;
@ -177,7 +170,6 @@ vnode_pager_alloc(handle, size, prot, offset)
vm_object_enter(object, pager);
object->pager = pager;
} else {
printf("Error in getattr: %d\n", rtval);
free((caddr_t) vnp, M_VMPGDATA);
free((caddr_t) pager, M_VMPAGER);
return (NULL);
@ -205,6 +197,8 @@ vnode_pager_alloc(handle, size, prot, offset)
*/
(void) vm_object_lookup(pager);
}
if( vp->v_type == VREG)
vp->v_flag |= VVMIO;
return (pager);
}
@ -317,7 +311,6 @@ vnode_pager_haspage(pager, offset)
if (incore(vp, block))
return TRUE;
VOP_LOCK(vp);
/*
* Read the index to find the disk block to read from. If there is no
* block, report that we don't have this data.
@ -325,7 +318,6 @@ vnode_pager_haspage(pager, offset)
* Assumes that the vnode has whole page or nothing.
*/
err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0);
VOP_UNLOCK(vp);
if (err)
return (TRUE);
return ((long) bn < 0 ? FALSE : TRUE);
@ -529,40 +521,6 @@ vnode_pager_iodone(bp)
{
bp->b_flags |= B_DONE;
wakeup((caddr_t) bp);
if (bp->b_flags & B_ASYNC) {
vm_offset_t paddr;
vm_page_t m;
vm_object_t obj = 0;
int i;
int npages;
paddr = (vm_offset_t) bp->b_data;
if (bp->b_bufsize != bp->b_bcount)
bzero(bp->b_data + bp->b_bcount,
bp->b_bufsize - bp->b_bcount);
npages = (bp->b_bufsize + PAGE_SIZE - 1) / PAGE_SIZE;
for (i = 0; i < npages; i++) {
m = PHYS_TO_VM_PAGE(pmap_kextract(paddr + i * PAGE_SIZE));
obj = m->object;
if (m) {
m->dirty = 0;
m->valid = VM_PAGE_BITS_ALL;
if (m->flags & PG_WANTED)
m->flags |= PG_REFERENCED;
PAGE_WAKEUP(m);
} else {
panic("vnode_pager_iodone: page is gone!!!");
}
}
pmap_qremove(paddr, npages);
if (obj) {
vm_object_pip_wakeup(obj);
} else {
panic("vnode_pager_iodone: object is gone???");
}
relpbuf(bp);
}
}
/*
@ -587,8 +545,6 @@ vnode_pager_input_smlfs(vnp, m)
bsize = vp->v_mount->mnt_stat.f_iosize;
VOP_LOCK(vp);
VOP_BMAP(vp, 0, &dp, 0, 0);
kva = vm_pager_map_page(m);
@ -637,16 +593,15 @@ vnode_pager_input_smlfs(vnp, m)
if (error)
break;
vm_page_set_clean(m, i * bsize, bsize);
vm_page_set_valid(m, i * bsize, bsize);
vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
vm_page_set_valid(m, (i * bsize) & (PAGE_SIZE-1), bsize);
} else {
vm_page_set_clean(m, i * bsize, bsize);
vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
bzero((caddr_t) kva + i * bsize, bsize);
}
nextblock:
}
vm_pager_unmap_page(kva);
VOP_UNLOCK(vp);
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
if (error) {
return VM_PAGER_ERROR;
@ -682,7 +637,6 @@ vnode_pager_input_old(vnp, m)
if (m->offset + size > vnp->vnp_size)
size = vnp->vnp_size - m->offset;
VOP_LOCK(vnp->vnp_vp);
/*
* Allocate a kernel virtual address and initialize so that
* we can use VOP_READ/WRITE routines.
@ -709,7 +663,6 @@ vnode_pager_input_old(vnp, m)
bzero((caddr_t) kva + count, PAGE_SIZE - count);
}
vm_pager_unmap_page(kva);
VOP_UNLOCK(vnp->vnp_vp);
}
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
m->dirty = 0;
@ -805,7 +758,6 @@ vnode_pager_input(vnp, m, count, reqpage)
* here on direct device I/O
*/
VOP_LOCK(vp);
firstaddr = -1;
/*
* calculate the run that includes the required page
@ -919,7 +871,6 @@ vnode_pager_input(vnp, m, count, reqpage)
* free the buffer header back to the swap buffer pool
*/
relpbuf(bp);
VOP_UNLOCK(vp);
finishup:
for (i = 0; i < count; i++) {
@ -954,147 +905,6 @@ vnode_pager_input(vnp, m, count, reqpage)
return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
}
/*
* old-style vnode pager output routine
*/
int
vnode_pager_output_old(vnp, m)
register vn_pager_t vnp;
vm_page_t m;
{
vm_offset_t kva, kva2;
vm_offset_t size;
struct iovec aiov;
struct uio auio;
struct vnode *vp;
int error;
vp = vnp->vnp_vp;
/*
* Dont return failure if beyond current EOF placate the VM system.
*/
if (m->offset >= vnp->vnp_size) {
return VM_PAGER_OK;
} else {
size = PAGE_SIZE;
if (m->offset + size > vnp->vnp_size)
size = vnp->vnp_size - m->offset;
kva2 = kmem_alloc(pager_map, PAGE_SIZE);
/*
* Allocate a kernel virtual address and initialize so that
* we can use VOP_WRITE routines.
*/
kva = vm_pager_map_page(m);
bcopy((caddr_t) kva, (caddr_t) kva2, size);
vm_pager_unmap_page(kva);
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
PAGE_WAKEUP(m);
aiov.iov_base = (caddr_t) kva2;
aiov.iov_len = size;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = m->offset;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_WRITE;
auio.uio_resid = size;
auio.uio_procp = (struct proc *) 0;
error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred);
kmem_free_wakeup(pager_map, kva2, PAGE_SIZE);
if (!error) {
if ((size - auio.uio_resid) == 0) {
error = EINVAL;
}
}
return error ? VM_PAGER_ERROR : VM_PAGER_OK;
}
}
/*
* vnode pager output on a small-block file system
*/
int
vnode_pager_output_smlfs(vnp, m)
vn_pager_t vnp;
vm_page_t m;
{
int i;
int s;
struct vnode *dp, *vp;
struct buf *bp;
vm_offset_t kva;
int fileaddr;
vm_offset_t bsize;
int error = 0;
vp = vnp->vnp_vp;
bsize = vp->v_mount->mnt_stat.f_iosize;
VOP_BMAP(vp, 0, &dp, 0, 0);
kva = vm_pager_map_page(m);
for (i = 0; !error && i < (PAGE_SIZE / bsize); i++) {
if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid & m->dirty) == 0)
continue;
/*
* calculate logical block and offset
*/
fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0);
if (fileaddr != -1) {
bp = getpbuf();
/* build a minimal buffer header */
bp->b_flags = B_BUSY | B_CALL | B_WRITE;
bp->b_iodone = vnode_pager_iodone;
bp->b_proc = curproc;
bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
if (bp->b_rcred != NOCRED)
crhold(bp->b_rcred);
if (bp->b_wcred != NOCRED)
crhold(bp->b_wcred);
bp->b_un.b_addr = (caddr_t) kva + i * bsize;
bp->b_blkno = fileaddr;
pbgetvp(dp, bp);
++dp->v_numoutput;
/* for NFS */
bp->b_dirtyoff = 0;
bp->b_dirtyend = bsize;
bp->b_bcount = bsize;
bp->b_bufsize = bsize;
/* do the input */
VOP_STRATEGY(bp);
/* we definitely need to be at splbio here */
s = splbio();
while ((bp->b_flags & B_DONE) == 0) {
tsleep((caddr_t) bp, PVM, "vnswrt", 0);
}
splx(s);
if ((bp->b_flags & B_ERROR) != 0)
error = EIO;
vm_page_set_clean(m, i * bsize, bsize);
/*
* free the buffer header back to the swap buffer pool
*/
relpbuf(bp);
}
}
vm_pager_unmap_page(kva);
if (error)
return VM_PAGER_ERROR;
else
return VM_PAGER_OK;
}
/*
* generic vnode pager output routine
*/
@ -1105,177 +915,95 @@ vnode_pager_output(vnp, m, count, rtvals)
int count;
int *rtvals;
{
int i, j;
vm_offset_t kva, foff;
int size;
vm_object_t object;
struct vnode *dp, *vp;
struct buf *bp;
vm_offset_t reqaddr;
int bsize;
int s;
daddr_t block;
int runpg;
int i;
int error = 0;
retryoutput:
object = m[0]->object; /* all vm_page_t items are in same object */
struct vnode *vp;
int maxsize, ncount;
struct uio auio;
struct iovec aiov;
int error;
vp = vnp->vnp_vp;
/*
* Make sure underlying filesystem is still mounted.
*/
if (vp->v_mount == NULL)
return VM_PAGER_FAIL;
bsize = vp->v_mount->mnt_stat.f_iosize;
for (i = 0; i < count; i++)
rtvals[i] = VM_PAGER_AGAIN;
if ((int) m[0]->offset < 0) {
printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x\n", m[0]->offset);
m[0]->dirty = 0;
rtvals[0] = VM_PAGER_OK;
return VM_PAGER_OK;
printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->offset, m[0]->dirty);
rtvals[0] = VM_PAGER_BAD;
return VM_PAGER_BAD;
}
VOP_LOCK(vp);
/*
* if the filesystem does not have a bmap, then use the old code
*/
if (VOP_BMAP(vp, (m[0]->offset / bsize), &dp, &block, 0) ||
(block == -1)) {
maxsize = count * PAGE_SIZE;
ncount = count;
rtvals[0] = vnode_pager_output_old(vnp, m[0]);
if( maxsize + m[0]->offset > vnp->vnp_size) {
maxsize = vnp->vnp_size - m[0]->offset;
ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE;
m[0]->dirty = 0;
cnt.v_vnodeout++;
cnt.v_vnodepgsout++;
VOP_UNLOCK(vp);
return rtvals[0];
}
/*
* if the filesystem has a small blocksize, then use the small block
* filesystem output code
*/
if ((bsize < PAGE_SIZE) &&
(vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
for (i = 0; i < count; i++) {
rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]);
if (rtvals[i] == VM_PAGER_OK) {
pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
if( ncount < count) {
for(i=ncount;i<count;i++) {
rtvals[i] = VM_PAGER_BAD;
}
if( ncount == 0) {
printf("vnode_pager_output: write past end of file: %d, %d\n",
m[0]->offset, vnp->vnp_size);
return rtvals[0];
}
}
cnt.v_vnodeout++;
cnt.v_vnodepgsout += count;
VOP_UNLOCK(vp);
return rtvals[0];
}
for (i = 0; i < count; i++) {
foff = m[i]->offset;
if (foff >= vnp->vnp_size) {
for (j = i; j < count; j++)
rtvals[j] = VM_PAGER_BAD;
count = i;
break;
}
for(i=0;i<count;i++) {
++m[i]->busy;
m[i]->flags &= ~PG_BUSY;
}
if (count == 0) {
VOP_UNLOCK(vp);
return rtvals[0];
}
foff = m[0]->offset;
reqaddr = vnode_pager_addr(vp, foff, &runpg);
if( runpg < count)
count = runpg;
/*
* calculate the size of the transfer
*/
size = count * PAGE_SIZE;
if ((foff + size) > vnp->vnp_size)
size = vnp->vnp_size - foff;
/*
* round up physical size for real devices
*/
if (dp->v_type == VBLK || dp->v_type == VCHR)
size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
bp = getpbuf();
kva = (vm_offset_t) bp->b_data;
/*
* and map the pages to be read into the kva
*/
pmap_qenter(kva, m, count);
/* build a minimal buffer header */
bp->b_flags = B_BUSY | B_WRITE | B_CALL;
bp->b_iodone = vnode_pager_iodone;
/* B_PHYS is not set, but it is nice to fill this in */
bp->b_proc = curproc;
bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
if (bp->b_rcred != NOCRED)
crhold(bp->b_rcred);
if (bp->b_wcred != NOCRED)
crhold(bp->b_wcred);
bp->b_blkno = reqaddr;
pbgetvp(dp, bp);
++dp->v_numoutput;
/* for NFS */
bp->b_dirtyoff = 0;
bp->b_dirtyend = size;
bp->b_bcount = size;
bp->b_bufsize = size;
aiov.iov_base = (caddr_t) 0;
aiov.iov_len = maxsize;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = m[0]->offset;
auio.uio_segflg = UIO_NOCOPY;
auio.uio_rw = UIO_WRITE;
auio.uio_resid = maxsize;
auio.uio_procp = (struct proc *) 0;
error = VOP_WRITE(vp, &auio, IO_VMIO, curproc->p_ucred);
cnt.v_vnodeout++;
cnt.v_vnodepgsout += count;
cnt.v_vnodepgsout += ncount;
/* do the output */
VOP_STRATEGY(bp);
s = splbio();
/* we definitely need to be at splbio here */
while ((bp->b_flags & B_DONE) == 0) {
tsleep((caddr_t) bp, PVM, "vnwrite", 0);
if( error) {
printf("vnode_pager_output: I/O error %d\n", error);
}
splx(s);
if ((bp->b_flags & B_ERROR) != 0)
error = EIO;
pmap_qremove(kva, count);
/*
* free the buffer header back to the swap buffer pool
*/
relpbuf(bp);
VOP_UNLOCK(vp);
if (!error) {
for (i = 0; i < count; i++) {
pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
m[i]->dirty = 0;
if( auio.uio_resid) {
printf("vnode_pager_output: residual I/O %d at %d\n", auio.uio_resid, m[0]->offset);
}
for(i=0;i < count;i++) {
--m[i]->busy;
if( i < ncount) {
rtvals[i] = VM_PAGER_OK;
}
} else if (count != 1) {
error = 0;
count = 1;
goto retryoutput;
if((m[i]->busy == 0) && (m[i]->flags & PG_WANTED))
wakeup((caddr_t) m[i]);
}
if (error) {
printf("vnode_pager_output: I/O write error\n");
}
return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
return rtvals[0];
}
struct vnode *
vnode_pager_lock(vm_object_t object) {
for(;object;object=object->shadow) {
vn_pager_t vnp;
if( !object->pager || (object->pager->pg_type != PG_VNODE))
continue;
vnp = (vn_pager_t) object->pager->pg_data;
VOP_LOCK(vnp->vnp_vp);
return vnp->vnp_vp;
}
return (struct vnode *)NULL;
}
void
vnode_pager_unlock(struct vnode *vp) {
VOP_UNLOCK(vp);
}