Implement a new totally dynamic (up to MAXPHYS) buffer kva allocation

scheme.  Additionally, add the capability for checking for unexpected
kernel page faults.  The maximum amount of kva space for buffers hasn't
been decreased from where it is, but it will now be possible to do so.

This scheme manages the kva space similar to the buffers themselves.  If
there isn't enough kva space because of usage or fragementation, buffers
will be reclaimed until a buffer allocation is successful.  This scheme
should be very resistant to fragmentation problems until/if the LFS code
is fixed and uses the bogus buffer locking scheme -- but a 'fixed' LFS
is not likely to use such a scheme.

Now there should be NO problem allocating buffers up to MAXPHYS.
This commit is contained in:
dyson 1996-11-30 22:41:49 +00:00
parent 7c59df49d9
commit 7a58275f33
9 changed files with 135 additions and 54 deletions

View File

@ -18,7 +18,7 @@
* 5. Modifications may be freely made to this file if the above conditions
* are met.
*
* $Id: vfs_bio.c,v 1.105 1996/11/17 02:10:48 dyson Exp $
* $Id: vfs_bio.c,v 1.106 1996/11/28 04:26:04 dyson Exp $
*/
/*
@ -51,6 +51,8 @@
#include <vm/vm_page.h>
#include <vm/vm_object.h>
#include <vm/vm_extern.h>
#include <vm/lock.h>
#include <vm/vm_map.h>
#include <sys/buf.h>
#include <sys/mount.h>
#include <sys/malloc.h>
@ -92,7 +94,6 @@ int vfs_update_wakeup;
/*
* buffers base kva
*/
caddr_t buffers_kva;
/*
* bogus page -- for I/O to/from partially complete buffers
@ -134,7 +135,6 @@ bufinit()
for (i = 0; i < BUFFER_QUEUES; i++)
TAILQ_INIT(&bufqueues[i]);
buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf);
/* finally, initialize each buffer header and stick on empty q */
for (i = 0; i < nbuf; i++) {
bp = &buf[i];
@ -145,7 +145,6 @@ bufinit()
bp->b_wcred = NOCRED;
bp->b_qindex = QUEUE_EMPTY;
bp->b_vnbufs.le_next = NOLIST;
bp->b_data = buffers_kva + i * MAXBSIZE;
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
LIST_INSERT_HEAD(&invalhash, bp, b_hash);
}
@ -176,6 +175,25 @@ bufinit()
}
/*
* Free the kva allocation for a buffer
* Must be called only at splbio or higher,
* as this is the only locking for buffer_map.
*/
static void
bfreekva(struct buf * bp)
{
if (bp->b_kvasize == 0)
return;
vm_map_delete(buffer_map,
(vm_offset_t) bp->b_kvabase,
(vm_offset_t) bp->b_kvabase + bp->b_kvasize);
bp->b_kvasize = 0;
}
/*
* remove the buffer from the appropriate free list
*/
@ -562,6 +580,10 @@ brelse(struct buf * bp)
LIST_REMOVE(bp, b_hash);
LIST_INSERT_HEAD(&invalhash, bp, b_hash);
bp->b_dev = NODEV;
/*
* Get rid of the kva allocation *now*
*/
bfreekva(bp);
if (needsbuffer) {
wakeup(&needsbuffer);
needsbuffer=0;
@ -724,7 +746,7 @@ vfs_vmio_release(bp)
/*
* Check to see if a block is currently memory resident.
*/
__inline struct buf *
struct buf *
gbincore(struct vnode * vp, daddr_t blkno)
{
struct buf *bp;
@ -812,10 +834,11 @@ vfs_bio_awrite(struct buf * bp)
* Find a buffer header which is available for use.
*/
static struct buf *
getnewbuf(int slpflag, int slptimeo, int doingvmio)
getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
{
struct buf *bp;
int nbyteswritten = 0;
vm_offset_t addr;
start:
if (bufspace >= maxbufspace)
@ -926,15 +949,43 @@ fillbuf:
bp->b_resid = 0;
bp->b_bcount = 0;
bp->b_npages = 0;
bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
bp->b_dirtyoff = bp->b_dirtyend = 0;
bp->b_validoff = bp->b_validend = 0;
bp->b_usecount = 4;
if (bufspace >= maxbufspace + nbyteswritten) {
maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK;
bfreekva(bp);
/*
* See if we have buffer kva space
*/
if (vm_map_findspace(buffer_map, 0, maxsize, &addr)) {
bp->b_flags |= B_INVAL;
brelse(bp);
goto trytofreespace;
}
/*
* See if we are below are allocated minimum
*/
if (bufspace >= (maxbufspace + nbyteswritten)) {
bp->b_flags |= B_INVAL;
brelse(bp);
goto trytofreespace;
}
/*
* create a map entry for the buffer -- in essence
* reserving the kva space.
*/
vm_map_insert(buffer_map, NULL, 0,
addr, addr + maxsize,
VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
bp->b_data = (caddr_t) addr;
bp->b_kvabase = (caddr_t) addr;
bp->b_kvasize = maxsize;
return (bp);
}
@ -1057,6 +1108,18 @@ getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
struct buf *bp;
int s;
struct bufhashhdr *bh;
int maxsize;
if (vp->v_mount) {
maxsize = vp->v_mount->mnt_stat.f_iosize;
/*
* This happens on mount points.
*/
if (maxsize < size)
maxsize = size;
} else {
maxsize = size;
}
if (size > MAXBSIZE)
panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
@ -1086,7 +1149,7 @@ loop:
*/
if (bp->b_bcount != size) {
if (bp->b_flags & B_VMIO) {
if ((bp->b_flags & B_VMIO) && (size <= bp->b_kvasize)) {
allocbuf(bp, size);
} else {
bp->b_flags |= B_NOCACHE;
@ -1101,14 +1164,8 @@ loop:
return (bp);
} else {
vm_object_t obj;
int doingvmio;
if ((obj = vp->v_object) && (vp->v_flag & VVMIO)) {
doingvmio = 1;
} else {
doingvmio = 0;
}
if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) {
if ((bp = getnewbuf(slpflag, slptimeo, size, maxsize)) == 0) {
if (slpflag || slptimeo) {
splx(s);
return NULL;
@ -1138,7 +1195,7 @@ loop:
bh = BUFHASH(vp, blkno);
LIST_INSERT_HEAD(bh, bp, b_hash);
if (doingvmio) {
if ((obj = vp->v_object) && (vp->v_flag & VVMIO)) {
bp->b_flags |= (B_VMIO | B_CACHE);
#if defined(VFS_BIO_DEBUG)
if (vp->v_type != VREG && vp->v_type != VBLK)
@ -1171,7 +1228,7 @@ geteblk(int size)
int s;
s = splbio();
while ((bp = getnewbuf(0, 0, 0)) == 0);
while ((bp = getnewbuf(0, 0, size, MAXBSIZE)) == 0);
splx(s);
allocbuf(bp, size);
bp->b_flags |= B_INVAL;
@ -1201,6 +1258,9 @@ allocbuf(struct buf * bp, int size)
if (!(bp->b_flags & B_BUSY))
panic("allocbuf: buffer not busy");
if (bp->b_kvasize < size)
panic("allocbuf: buffer too small");
if ((bp->b_flags & B_VMIO) == 0) {
caddr_t origbuf;
int origbufsize;
@ -1227,7 +1287,7 @@ allocbuf(struct buf * bp, int size)
free(bp->b_data, M_BIOBUF);
bufspace -= bp->b_bufsize;
bufmallocspace -= bp->b_bufsize;
bp->b_data = (caddr_t) buffers_kva + (bp - buf) * MAXBSIZE;
bp->b_data = bp->b_kvabase;
bp->b_bufsize = 0;
bp->b_bcount = 0;
bp->b_flags &= ~B_MALLOC;
@ -1268,7 +1328,7 @@ allocbuf(struct buf * bp, int size)
if (bp->b_flags & B_MALLOC) {
origbuf = bp->b_data;
origbufsize = bp->b_bufsize;
bp->b_data = (caddr_t) buffers_kva + (bp - buf) * MAXBSIZE;
bp->b_data = bp->b_kvabase;
bufspace -= bp->b_bufsize;
bufmallocspace -= bp->b_bufsize;
bp->b_bufsize = 0;

View File

@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94
* $Id: vfs_cluster.c,v 1.37 1996/07/27 18:49:18 dyson Exp $
* $Id: vfs_cluster.c,v 1.38 1996/10/06 07:50:04 dyson Exp $
*/
#include <sys/param.h>
@ -385,6 +385,10 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run)
VM_PAGE_BITS_ALL)
bp->b_pages[j] = bogus_page;
}
if (bp->b_bufsize > bp->b_kvasize)
panic("cluster_rbuild: b_bufsize(%d) > b_kvasize(%d)\n",
bp->b_bufsize, bp->b_kvasize);
bp->b_kvasize = bp->b_bufsize;
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *)bp->b_pages, bp->b_npages);
@ -690,6 +694,10 @@ cluster_wbuild(vp, size, start_lbn, len)
}
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *) bp->b_pages, bp->b_npages);
if (bp->b_bufsize > bp->b_kvasize)
panic("cluster_wbuild: b_bufsize(%d) > b_kvasize(%d)\n",
bp->b_bufsize, bp->b_kvasize);
bp->b_kvasize = bp->b_bufsize;
totalwritten += bp->b_bufsize;
bp->b_dirtyoff = 0;
bp->b_dirtyend = bp->b_bufsize;

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)buf.h 8.9 (Berkeley) 3/30/95
* $Id: buf.h,v 1.33 1996/09/06 05:35:00 gibbs Exp $
* $Id: buf.h,v 1.34 1996/10/13 14:36:37 phk Exp $
*/
#ifndef _SYS_BUF_H_
@ -80,6 +80,8 @@ struct buf {
struct {
caddr_t b_addr; /* Memory, superblocks, indirect etc. */
} b_un;
caddr_t b_kvabase; /* base kva for buffer */
int b_kvasize; /* size of kva for buffer */
void *b_saveaddr; /* Original b_addr for physio. */
daddr_t b_lblkno; /* Logical block number. */
daddr_t b_blkno; /* Underlying physical block number. */

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)buf.h 8.9 (Berkeley) 3/30/95
* $Id: buf.h,v 1.33 1996/09/06 05:35:00 gibbs Exp $
* $Id: buf.h,v 1.34 1996/10/13 14:36:37 phk Exp $
*/
#ifndef _SYS_BUF_H_
@ -80,6 +80,8 @@ struct buf {
struct {
caddr_t b_addr; /* Memory, superblocks, indirect etc. */
} b_un;
caddr_t b_kvabase; /* base kva for buffer */
int b_kvasize; /* size of kva for buffer */
void *b_saveaddr; /* Original b_addr for physio. */
daddr_t b_lblkno; /* Logical block number. */
daddr_t b_blkno; /* Underlying physical block number. */

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)param.h 8.3 (Berkeley) 4/4/95
* $Id: param.h,v 1.15 1996/05/02 14:21:03 phk Exp $
* $Id: param.h,v 1.16 1996/11/28 04:07:44 dyson Exp $
*/
#ifndef _SYS_PARAM_H_
@ -146,11 +146,8 @@
* it smaller make make some file systems unmountable. Also, MAXBSIZE
* must be less than MAXPHYS!!!
*/
#if defined(MSDOSFS)
#define MAXBSIZE 32768
#else
#define MAXBSIZE 16384
#endif
#define MAXBSIZE 65536
#define DFLTBSIZE 8192
#define MAXFRAG 8
/*

View File

@ -66,7 +66,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_fault.c,v 1.56 1996/07/30 03:08:07 dyson Exp $
* $Id: vm_fault.c,v 1.57 1996/09/08 20:44:37 dyson Exp $
*/
/*
@ -81,6 +81,7 @@
#include <sys/signalvar.h>
#include <sys/resourcevar.h>
#include <sys/vmmeter.h>
#include <sys/buf.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@ -103,10 +104,6 @@ int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *));
#define VM_FAULT_READ_BEHIND 3
#define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1)
int vm_fault_free_1;
int vm_fault_copy_save_1;
int vm_fault_copy_save_2;
/*
* vm_fault:
*
@ -200,6 +197,11 @@ RetryFault:;
return (result);
}
if (entry->nofault) {
panic("vm_fault: fault on nofault entry, addr: %lx",
vaddr);
}
vp = vnode_pager_lock(first_object);
lookup_still_valid = TRUE;
@ -565,7 +567,6 @@ readrest:
first_m = m;
m->dirty = VM_PAGE_BITS_ALL;
m = NULL;
++vm_fault_copy_save_1;
} else {
/*
* Oh, well, lets copy it.
@ -639,7 +640,6 @@ readrest:
PAGE_WAKEUP(m);
vm_page_free(m);
m = NULL;
++vm_fault_free_1;
tm->dirty = VM_PAGE_BITS_ALL;
first_m->dirty = VM_PAGE_BITS_ALL;
}
@ -651,7 +651,6 @@ readrest:
vm_page_rename(m, other_object, other_pindex);
m->dirty = VM_PAGE_BITS_ALL;
m->valid = VM_PAGE_BITS_ALL;
++vm_fault_copy_save_2;
}
}
}

View File

@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_map.c,v 1.56 1996/09/08 23:49:47 dyson Exp $
* $Id: vm_map.c,v 1.57 1996/09/14 11:54:55 bde Exp $
*/
/*
@ -689,6 +689,11 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow)
else
new_entry->copy_on_write = FALSE;
if (cow & MAP_NOFAULT)
new_entry->nofault = TRUE;
else
new_entry->nofault = FALSE;
if (map->is_main_map) {
new_entry->inheritance = VM_INHERIT_DEFAULT;
new_entry->protection = prot;

View File

@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_map.h,v 1.14 1996/07/27 03:23:59 dyson Exp $
* $Id: vm_map.h,v 1.15 1996/07/30 03:08:11 dyson Exp $
*/
/*
@ -106,9 +106,9 @@ struct vm_map_entry {
vm_ooffset_t offset; /* offset into object */
boolean_t is_a_map:1, /* Is "object" a map? */
is_sub_map:1, /* Is "object" a submap? */
/* Only in sharing maps: */
copy_on_write:1, /* is data copy-on-write */
needs_copy:1; /* does object need to be copied */
needs_copy:1, /* does object need to be copied */
nofault:1; /* should never fault */
/* Only in task maps: */
vm_prot_t protection; /* protection code */
vm_prot_t max_protection; /* maximum protection */
@ -208,6 +208,7 @@ typedef struct {
*/
#define MAP_COPY_NEEDED 0x1
#define MAP_COPY_ON_WRITE 0x2
#define MAP_NOFAULT 0x4
#ifdef KERNEL
extern vm_offset_t kentry_data;

View File

@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_pager.c,v 1.23 1996/05/18 03:38:05 dyson Exp $
* $Id: vm_pager.c,v 1.24 1996/09/08 20:44:49 dyson Exp $
*/
/*
@ -277,6 +277,22 @@ pager_cache(object, should_cache)
return (KERN_SUCCESS);
}
/*
* initialize a physical buffer
*/
static void
initpbuf(struct buf *bp) {
bzero(bp, sizeof *bp);
bp->b_rcred = NOCRED;
bp->b_wcred = NOCRED;
bp->b_qindex = QUEUE_NONE;
bp->b_data = (caddr_t) (MAXPHYS * (bp - swbuf)) + swapbkva;
bp->b_kvabase = bp->b_data;
bp->b_kvasize = MAXPHYS;
bp->b_vnbufs.le_next = NOLIST;
}
/*
* allocate a physical buffer
*/
@ -295,12 +311,7 @@ getpbuf()
TAILQ_REMOVE(&bswlist, bp, b_freelist);
splx(s);
bzero(bp, sizeof *bp);
bp->b_rcred = NOCRED;
bp->b_wcred = NOCRED;
bp->b_qindex = QUEUE_NONE;
bp->b_data = (caddr_t) (MAXPHYS * (bp - swbuf)) + swapbkva;
bp->b_vnbufs.le_next = NOLIST;
initpbuf(bp);
return bp;
}
@ -321,12 +332,8 @@ trypbuf()
TAILQ_REMOVE(&bswlist, bp, b_freelist);
splx(s);
bzero(bp, sizeof *bp);
bp->b_rcred = NOCRED;
bp->b_wcred = NOCRED;
bp->b_qindex = QUEUE_NONE;
bp->b_data = (caddr_t) (MAXPHYS * (bp - swbuf)) + swapbkva;
bp->b_vnbufs.le_next = NOLIST;
initpbuf(bp);
return bp;
}