Implement a new totally dynamic (up to MAXPHYS) buffer kva allocation
scheme. Additionally, add the capability for checking for unexpected kernel page faults. The maximum amount of kva space for buffers hasn't been decreased from where it is, but it will now be possible to do so. This scheme manages the kva space similar to the buffers themselves. If there isn't enough kva space because of usage or fragementation, buffers will be reclaimed until a buffer allocation is successful. This scheme should be very resistant to fragmentation problems until/if the LFS code is fixed and uses the bogus buffer locking scheme -- but a 'fixed' LFS is not likely to use such a scheme. Now there should be NO problem allocating buffers up to MAXPHYS.
This commit is contained in:
parent
7c59df49d9
commit
7a58275f33
@ -18,7 +18,7 @@
|
||||
* 5. Modifications may be freely made to this file if the above conditions
|
||||
* are met.
|
||||
*
|
||||
* $Id: vfs_bio.c,v 1.105 1996/11/17 02:10:48 dyson Exp $
|
||||
* $Id: vfs_bio.c,v 1.106 1996/11/28 04:26:04 dyson Exp $
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -51,6 +51,8 @@
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/lock.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/malloc.h>
|
||||
@ -92,7 +94,6 @@ int vfs_update_wakeup;
|
||||
/*
|
||||
* buffers base kva
|
||||
*/
|
||||
caddr_t buffers_kva;
|
||||
|
||||
/*
|
||||
* bogus page -- for I/O to/from partially complete buffers
|
||||
@ -134,7 +135,6 @@ bufinit()
|
||||
for (i = 0; i < BUFFER_QUEUES; i++)
|
||||
TAILQ_INIT(&bufqueues[i]);
|
||||
|
||||
buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf);
|
||||
/* finally, initialize each buffer header and stick on empty q */
|
||||
for (i = 0; i < nbuf; i++) {
|
||||
bp = &buf[i];
|
||||
@ -145,7 +145,6 @@ bufinit()
|
||||
bp->b_wcred = NOCRED;
|
||||
bp->b_qindex = QUEUE_EMPTY;
|
||||
bp->b_vnbufs.le_next = NOLIST;
|
||||
bp->b_data = buffers_kva + i * MAXBSIZE;
|
||||
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
|
||||
LIST_INSERT_HEAD(&invalhash, bp, b_hash);
|
||||
}
|
||||
@ -176,6 +175,25 @@ bufinit()
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the kva allocation for a buffer
|
||||
* Must be called only at splbio or higher,
|
||||
* as this is the only locking for buffer_map.
|
||||
*/
|
||||
static void
|
||||
bfreekva(struct buf * bp)
|
||||
{
|
||||
if (bp->b_kvasize == 0)
|
||||
return;
|
||||
|
||||
vm_map_delete(buffer_map,
|
||||
(vm_offset_t) bp->b_kvabase,
|
||||
(vm_offset_t) bp->b_kvabase + bp->b_kvasize);
|
||||
|
||||
bp->b_kvasize = 0;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* remove the buffer from the appropriate free list
|
||||
*/
|
||||
@ -562,6 +580,10 @@ brelse(struct buf * bp)
|
||||
LIST_REMOVE(bp, b_hash);
|
||||
LIST_INSERT_HEAD(&invalhash, bp, b_hash);
|
||||
bp->b_dev = NODEV;
|
||||
/*
|
||||
* Get rid of the kva allocation *now*
|
||||
*/
|
||||
bfreekva(bp);
|
||||
if (needsbuffer) {
|
||||
wakeup(&needsbuffer);
|
||||
needsbuffer=0;
|
||||
@ -724,7 +746,7 @@ vfs_vmio_release(bp)
|
||||
/*
|
||||
* Check to see if a block is currently memory resident.
|
||||
*/
|
||||
__inline struct buf *
|
||||
struct buf *
|
||||
gbincore(struct vnode * vp, daddr_t blkno)
|
||||
{
|
||||
struct buf *bp;
|
||||
@ -812,10 +834,11 @@ vfs_bio_awrite(struct buf * bp)
|
||||
* Find a buffer header which is available for use.
|
||||
*/
|
||||
static struct buf *
|
||||
getnewbuf(int slpflag, int slptimeo, int doingvmio)
|
||||
getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
|
||||
{
|
||||
struct buf *bp;
|
||||
int nbyteswritten = 0;
|
||||
vm_offset_t addr;
|
||||
|
||||
start:
|
||||
if (bufspace >= maxbufspace)
|
||||
@ -926,15 +949,43 @@ fillbuf:
|
||||
bp->b_resid = 0;
|
||||
bp->b_bcount = 0;
|
||||
bp->b_npages = 0;
|
||||
bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
|
||||
bp->b_dirtyoff = bp->b_dirtyend = 0;
|
||||
bp->b_validoff = bp->b_validend = 0;
|
||||
bp->b_usecount = 4;
|
||||
if (bufspace >= maxbufspace + nbyteswritten) {
|
||||
|
||||
maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK;
|
||||
bfreekva(bp);
|
||||
|
||||
/*
|
||||
* See if we have buffer kva space
|
||||
*/
|
||||
if (vm_map_findspace(buffer_map, 0, maxsize, &addr)) {
|
||||
bp->b_flags |= B_INVAL;
|
||||
brelse(bp);
|
||||
goto trytofreespace;
|
||||
}
|
||||
|
||||
/*
|
||||
* See if we are below are allocated minimum
|
||||
*/
|
||||
if (bufspace >= (maxbufspace + nbyteswritten)) {
|
||||
bp->b_flags |= B_INVAL;
|
||||
brelse(bp);
|
||||
goto trytofreespace;
|
||||
}
|
||||
|
||||
/*
|
||||
* create a map entry for the buffer -- in essence
|
||||
* reserving the kva space.
|
||||
*/
|
||||
vm_map_insert(buffer_map, NULL, 0,
|
||||
addr, addr + maxsize,
|
||||
VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
|
||||
|
||||
bp->b_data = (caddr_t) addr;
|
||||
bp->b_kvabase = (caddr_t) addr;
|
||||
bp->b_kvasize = maxsize;
|
||||
|
||||
return (bp);
|
||||
}
|
||||
|
||||
@ -1057,6 +1108,18 @@ getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
|
||||
struct buf *bp;
|
||||
int s;
|
||||
struct bufhashhdr *bh;
|
||||
int maxsize;
|
||||
|
||||
if (vp->v_mount) {
|
||||
maxsize = vp->v_mount->mnt_stat.f_iosize;
|
||||
/*
|
||||
* This happens on mount points.
|
||||
*/
|
||||
if (maxsize < size)
|
||||
maxsize = size;
|
||||
} else {
|
||||
maxsize = size;
|
||||
}
|
||||
|
||||
if (size > MAXBSIZE)
|
||||
panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
|
||||
@ -1086,7 +1149,7 @@ loop:
|
||||
*/
|
||||
|
||||
if (bp->b_bcount != size) {
|
||||
if (bp->b_flags & B_VMIO) {
|
||||
if ((bp->b_flags & B_VMIO) && (size <= bp->b_kvasize)) {
|
||||
allocbuf(bp, size);
|
||||
} else {
|
||||
bp->b_flags |= B_NOCACHE;
|
||||
@ -1101,14 +1164,8 @@ loop:
|
||||
return (bp);
|
||||
} else {
|
||||
vm_object_t obj;
|
||||
int doingvmio;
|
||||
|
||||
if ((obj = vp->v_object) && (vp->v_flag & VVMIO)) {
|
||||
doingvmio = 1;
|
||||
} else {
|
||||
doingvmio = 0;
|
||||
}
|
||||
if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) {
|
||||
if ((bp = getnewbuf(slpflag, slptimeo, size, maxsize)) == 0) {
|
||||
if (slpflag || slptimeo) {
|
||||
splx(s);
|
||||
return NULL;
|
||||
@ -1138,7 +1195,7 @@ loop:
|
||||
bh = BUFHASH(vp, blkno);
|
||||
LIST_INSERT_HEAD(bh, bp, b_hash);
|
||||
|
||||
if (doingvmio) {
|
||||
if ((obj = vp->v_object) && (vp->v_flag & VVMIO)) {
|
||||
bp->b_flags |= (B_VMIO | B_CACHE);
|
||||
#if defined(VFS_BIO_DEBUG)
|
||||
if (vp->v_type != VREG && vp->v_type != VBLK)
|
||||
@ -1171,7 +1228,7 @@ geteblk(int size)
|
||||
int s;
|
||||
|
||||
s = splbio();
|
||||
while ((bp = getnewbuf(0, 0, 0)) == 0);
|
||||
while ((bp = getnewbuf(0, 0, size, MAXBSIZE)) == 0);
|
||||
splx(s);
|
||||
allocbuf(bp, size);
|
||||
bp->b_flags |= B_INVAL;
|
||||
@ -1201,6 +1258,9 @@ allocbuf(struct buf * bp, int size)
|
||||
if (!(bp->b_flags & B_BUSY))
|
||||
panic("allocbuf: buffer not busy");
|
||||
|
||||
if (bp->b_kvasize < size)
|
||||
panic("allocbuf: buffer too small");
|
||||
|
||||
if ((bp->b_flags & B_VMIO) == 0) {
|
||||
caddr_t origbuf;
|
||||
int origbufsize;
|
||||
@ -1227,7 +1287,7 @@ allocbuf(struct buf * bp, int size)
|
||||
free(bp->b_data, M_BIOBUF);
|
||||
bufspace -= bp->b_bufsize;
|
||||
bufmallocspace -= bp->b_bufsize;
|
||||
bp->b_data = (caddr_t) buffers_kva + (bp - buf) * MAXBSIZE;
|
||||
bp->b_data = bp->b_kvabase;
|
||||
bp->b_bufsize = 0;
|
||||
bp->b_bcount = 0;
|
||||
bp->b_flags &= ~B_MALLOC;
|
||||
@ -1268,7 +1328,7 @@ allocbuf(struct buf * bp, int size)
|
||||
if (bp->b_flags & B_MALLOC) {
|
||||
origbuf = bp->b_data;
|
||||
origbufsize = bp->b_bufsize;
|
||||
bp->b_data = (caddr_t) buffers_kva + (bp - buf) * MAXBSIZE;
|
||||
bp->b_data = bp->b_kvabase;
|
||||
bufspace -= bp->b_bufsize;
|
||||
bufmallocspace -= bp->b_bufsize;
|
||||
bp->b_bufsize = 0;
|
||||
|
@ -33,7 +33,7 @@
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94
|
||||
* $Id: vfs_cluster.c,v 1.37 1996/07/27 18:49:18 dyson Exp $
|
||||
* $Id: vfs_cluster.c,v 1.38 1996/10/06 07:50:04 dyson Exp $
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
@ -385,6 +385,10 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run)
|
||||
VM_PAGE_BITS_ALL)
|
||||
bp->b_pages[j] = bogus_page;
|
||||
}
|
||||
if (bp->b_bufsize > bp->b_kvasize)
|
||||
panic("cluster_rbuild: b_bufsize(%d) > b_kvasize(%d)\n",
|
||||
bp->b_bufsize, bp->b_kvasize);
|
||||
bp->b_kvasize = bp->b_bufsize;
|
||||
|
||||
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
|
||||
(vm_page_t *)bp->b_pages, bp->b_npages);
|
||||
@ -690,6 +694,10 @@ cluster_wbuild(vp, size, start_lbn, len)
|
||||
}
|
||||
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
|
||||
(vm_page_t *) bp->b_pages, bp->b_npages);
|
||||
if (bp->b_bufsize > bp->b_kvasize)
|
||||
panic("cluster_wbuild: b_bufsize(%d) > b_kvasize(%d)\n",
|
||||
bp->b_bufsize, bp->b_kvasize);
|
||||
bp->b_kvasize = bp->b_bufsize;
|
||||
totalwritten += bp->b_bufsize;
|
||||
bp->b_dirtyoff = 0;
|
||||
bp->b_dirtyend = bp->b_bufsize;
|
||||
|
@ -36,7 +36,7 @@
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)buf.h 8.9 (Berkeley) 3/30/95
|
||||
* $Id: buf.h,v 1.33 1996/09/06 05:35:00 gibbs Exp $
|
||||
* $Id: buf.h,v 1.34 1996/10/13 14:36:37 phk Exp $
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BUF_H_
|
||||
@ -80,6 +80,8 @@ struct buf {
|
||||
struct {
|
||||
caddr_t b_addr; /* Memory, superblocks, indirect etc. */
|
||||
} b_un;
|
||||
caddr_t b_kvabase; /* base kva for buffer */
|
||||
int b_kvasize; /* size of kva for buffer */
|
||||
void *b_saveaddr; /* Original b_addr for physio. */
|
||||
daddr_t b_lblkno; /* Logical block number. */
|
||||
daddr_t b_blkno; /* Underlying physical block number. */
|
||||
|
@ -36,7 +36,7 @@
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)buf.h 8.9 (Berkeley) 3/30/95
|
||||
* $Id: buf.h,v 1.33 1996/09/06 05:35:00 gibbs Exp $
|
||||
* $Id: buf.h,v 1.34 1996/10/13 14:36:37 phk Exp $
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BUF_H_
|
||||
@ -80,6 +80,8 @@ struct buf {
|
||||
struct {
|
||||
caddr_t b_addr; /* Memory, superblocks, indirect etc. */
|
||||
} b_un;
|
||||
caddr_t b_kvabase; /* base kva for buffer */
|
||||
int b_kvasize; /* size of kva for buffer */
|
||||
void *b_saveaddr; /* Original b_addr for physio. */
|
||||
daddr_t b_lblkno; /* Logical block number. */
|
||||
daddr_t b_blkno; /* Underlying physical block number. */
|
||||
|
@ -36,7 +36,7 @@
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)param.h 8.3 (Berkeley) 4/4/95
|
||||
* $Id: param.h,v 1.15 1996/05/02 14:21:03 phk Exp $
|
||||
* $Id: param.h,v 1.16 1996/11/28 04:07:44 dyson Exp $
|
||||
*/
|
||||
|
||||
#ifndef _SYS_PARAM_H_
|
||||
@ -146,11 +146,8 @@
|
||||
* it smaller make make some file systems unmountable. Also, MAXBSIZE
|
||||
* must be less than MAXPHYS!!!
|
||||
*/
|
||||
#if defined(MSDOSFS)
|
||||
#define MAXBSIZE 32768
|
||||
#else
|
||||
#define MAXBSIZE 16384
|
||||
#endif
|
||||
#define MAXBSIZE 65536
|
||||
#define DFLTBSIZE 8192
|
||||
#define MAXFRAG 8
|
||||
|
||||
/*
|
||||
|
@ -66,7 +66,7 @@
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*
|
||||
* $Id: vm_fault.c,v 1.56 1996/07/30 03:08:07 dyson Exp $
|
||||
* $Id: vm_fault.c,v 1.57 1996/09/08 20:44:37 dyson Exp $
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -81,6 +81,7 @@
|
||||
#include <sys/signalvar.h>
|
||||
#include <sys/resourcevar.h>
|
||||
#include <sys/vmmeter.h>
|
||||
#include <sys/buf.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_param.h>
|
||||
@ -103,10 +104,6 @@ int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *));
|
||||
#define VM_FAULT_READ_BEHIND 3
|
||||
#define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1)
|
||||
|
||||
int vm_fault_free_1;
|
||||
int vm_fault_copy_save_1;
|
||||
int vm_fault_copy_save_2;
|
||||
|
||||
/*
|
||||
* vm_fault:
|
||||
*
|
||||
@ -200,6 +197,11 @@ RetryFault:;
|
||||
return (result);
|
||||
}
|
||||
|
||||
if (entry->nofault) {
|
||||
panic("vm_fault: fault on nofault entry, addr: %lx",
|
||||
vaddr);
|
||||
}
|
||||
|
||||
vp = vnode_pager_lock(first_object);
|
||||
|
||||
lookup_still_valid = TRUE;
|
||||
@ -565,7 +567,6 @@ readrest:
|
||||
first_m = m;
|
||||
m->dirty = VM_PAGE_BITS_ALL;
|
||||
m = NULL;
|
||||
++vm_fault_copy_save_1;
|
||||
} else {
|
||||
/*
|
||||
* Oh, well, lets copy it.
|
||||
@ -639,7 +640,6 @@ readrest:
|
||||
PAGE_WAKEUP(m);
|
||||
vm_page_free(m);
|
||||
m = NULL;
|
||||
++vm_fault_free_1;
|
||||
tm->dirty = VM_PAGE_BITS_ALL;
|
||||
first_m->dirty = VM_PAGE_BITS_ALL;
|
||||
}
|
||||
@ -651,7 +651,6 @@ readrest:
|
||||
vm_page_rename(m, other_object, other_pindex);
|
||||
m->dirty = VM_PAGE_BITS_ALL;
|
||||
m->valid = VM_PAGE_BITS_ALL;
|
||||
++vm_fault_copy_save_2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -61,7 +61,7 @@
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*
|
||||
* $Id: vm_map.c,v 1.56 1996/09/08 23:49:47 dyson Exp $
|
||||
* $Id: vm_map.c,v 1.57 1996/09/14 11:54:55 bde Exp $
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -689,6 +689,11 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow)
|
||||
else
|
||||
new_entry->copy_on_write = FALSE;
|
||||
|
||||
if (cow & MAP_NOFAULT)
|
||||
new_entry->nofault = TRUE;
|
||||
else
|
||||
new_entry->nofault = FALSE;
|
||||
|
||||
if (map->is_main_map) {
|
||||
new_entry->inheritance = VM_INHERIT_DEFAULT;
|
||||
new_entry->protection = prot;
|
||||
|
@ -61,7 +61,7 @@
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*
|
||||
* $Id: vm_map.h,v 1.14 1996/07/27 03:23:59 dyson Exp $
|
||||
* $Id: vm_map.h,v 1.15 1996/07/30 03:08:11 dyson Exp $
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -106,9 +106,9 @@ struct vm_map_entry {
|
||||
vm_ooffset_t offset; /* offset into object */
|
||||
boolean_t is_a_map:1, /* Is "object" a map? */
|
||||
is_sub_map:1, /* Is "object" a submap? */
|
||||
/* Only in sharing maps: */
|
||||
copy_on_write:1, /* is data copy-on-write */
|
||||
needs_copy:1; /* does object need to be copied */
|
||||
needs_copy:1, /* does object need to be copied */
|
||||
nofault:1; /* should never fault */
|
||||
/* Only in task maps: */
|
||||
vm_prot_t protection; /* protection code */
|
||||
vm_prot_t max_protection; /* maximum protection */
|
||||
@ -208,6 +208,7 @@ typedef struct {
|
||||
*/
|
||||
#define MAP_COPY_NEEDED 0x1
|
||||
#define MAP_COPY_ON_WRITE 0x2
|
||||
#define MAP_NOFAULT 0x4
|
||||
|
||||
#ifdef KERNEL
|
||||
extern vm_offset_t kentry_data;
|
||||
|
@ -61,7 +61,7 @@
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*
|
||||
* $Id: vm_pager.c,v 1.23 1996/05/18 03:38:05 dyson Exp $
|
||||
* $Id: vm_pager.c,v 1.24 1996/09/08 20:44:49 dyson Exp $
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -277,6 +277,22 @@ pager_cache(object, should_cache)
|
||||
return (KERN_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize a physical buffer
|
||||
*/
|
||||
|
||||
static void
|
||||
initpbuf(struct buf *bp) {
|
||||
bzero(bp, sizeof *bp);
|
||||
bp->b_rcred = NOCRED;
|
||||
bp->b_wcred = NOCRED;
|
||||
bp->b_qindex = QUEUE_NONE;
|
||||
bp->b_data = (caddr_t) (MAXPHYS * (bp - swbuf)) + swapbkva;
|
||||
bp->b_kvabase = bp->b_data;
|
||||
bp->b_kvasize = MAXPHYS;
|
||||
bp->b_vnbufs.le_next = NOLIST;
|
||||
}
|
||||
|
||||
/*
|
||||
* allocate a physical buffer
|
||||
*/
|
||||
@ -295,12 +311,7 @@ getpbuf()
|
||||
TAILQ_REMOVE(&bswlist, bp, b_freelist);
|
||||
splx(s);
|
||||
|
||||
bzero(bp, sizeof *bp);
|
||||
bp->b_rcred = NOCRED;
|
||||
bp->b_wcred = NOCRED;
|
||||
bp->b_qindex = QUEUE_NONE;
|
||||
bp->b_data = (caddr_t) (MAXPHYS * (bp - swbuf)) + swapbkva;
|
||||
bp->b_vnbufs.le_next = NOLIST;
|
||||
initpbuf(bp);
|
||||
return bp;
|
||||
}
|
||||
|
||||
@ -321,12 +332,8 @@ trypbuf()
|
||||
TAILQ_REMOVE(&bswlist, bp, b_freelist);
|
||||
splx(s);
|
||||
|
||||
bzero(bp, sizeof *bp);
|
||||
bp->b_rcred = NOCRED;
|
||||
bp->b_wcred = NOCRED;
|
||||
bp->b_qindex = QUEUE_NONE;
|
||||
bp->b_data = (caddr_t) (MAXPHYS * (bp - swbuf)) + swapbkva;
|
||||
bp->b_vnbufs.le_next = NOLIST;
|
||||
initpbuf(bp);
|
||||
|
||||
return bp;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user