Adjust some variables (mostly related to the buffer cache) that hold

address space sizes to be longs instead of ints.  Specifically, the follow
values are now longs: runningbufspace, bufspace, maxbufspace,
bufmallocspace, maxbufmallocspace, lobufspace, hibufspace, lorunningspace,
hirunningspace, maxswzone, maxbcache, and maxpipekva.  Previously, a
relatively small number (~ 44000) of buffers set in kern.nbuf would result
in integer overflows resulting either in hangs or bogus values of
hidirtybuffers and lodirtybuffers.  Now one has to overflow a long to see
such problems.  There was a check for a nbuf setting that would cause
overflows in the auto-tuning of nbuf.  I've changed it to always check and
cap nbuf but warn if a user-supplied tunable would cause overflow.

Note that this changes the ABI of several sysctls that are used by things
like top(1), etc., so any MFC would probably require a some gross shims
to allow for that.

MFC after:	1 month
This commit is contained in:
John Baldwin 2009-03-09 19:35:20 +00:00
parent f3106cde19
commit 5bd65606f4
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=189595
9 changed files with 60 additions and 53 deletions

View File

@ -89,9 +89,9 @@ int maxfilesperproc; /* per-proc open files limit */
int ncallout; /* maximum # of timer events */
int nbuf;
int nswbuf;
int maxswzone; /* max swmeta KVA storage */
int maxbcache; /* max buffer cache KVA storage */
int maxpipekva; /* Limit on pipe KVA */
long maxswzone; /* max swmeta KVA storage */
long maxbcache; /* max buffer cache KVA storage */
u_long maxpipekva; /* Limit on pipe KVA */
int vm_guest; /* Running as virtual machine guest? */
u_long maxtsiz; /* max text size */
u_long dfldsiz; /* initial data size limit */
@ -203,11 +203,11 @@ init_param1(void)
#ifdef VM_SWZONE_SIZE_MAX
maxswzone = VM_SWZONE_SIZE_MAX;
#endif
TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone);
TUNABLE_LONG_FETCH("kern.maxswzone", &maxswzone);
#ifdef VM_BCACHE_SIZE_MAX
maxbcache = VM_BCACHE_SIZE_MAX;
#endif
TUNABLE_INT_FETCH("kern.maxbcache", &maxbcache);
TUNABLE_LONG_FETCH("kern.maxbcache", &maxbcache);
maxtsiz = MAXTSIZ;
TUNABLE_ULONG_FETCH("kern.maxtsiz", &maxtsiz);
@ -282,7 +282,7 @@ init_param3(long kmempages)
maxpipekva = (kmempages / 20) * PAGE_SIZE;
if (maxpipekva < 512 * 1024)
maxpipekva = 512 * 1024;
TUNABLE_INT_FETCH("kern.ipc.maxpipekva", &maxpipekva);
TUNABLE_ULONG_FETCH("kern.ipc.maxpipekva", &maxpipekva);
}
/*

View File

@ -184,7 +184,7 @@ static int pipeallocfail;
static int piperesizefail;
static int piperesizeallowed = 1;
SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN,
SYSCTL_ULONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN,
&maxpipekva, 0, "Pipe KVA limit");
SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
&amountpipekva, 0, "Pipe KVA usage");

View File

@ -112,26 +112,26 @@ static void bremfreel(struct buf *bp);
int vmiodirenable = TRUE;
SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0,
"Use the VM system for directory writes");
int runningbufspace;
SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0,
long runningbufspace;
SYSCTL_LONG(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0,
"Amount of presently outstanding async buffer io");
static int bufspace;
SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0,
static long bufspace;
SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0,
"KVA memory used for bufs");
static int maxbufspace;
SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0,
static long maxbufspace;
SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0,
"Maximum allowed value of bufspace (including buf_daemon)");
static int bufmallocspace;
SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0,
static long bufmallocspace;
SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0,
"Amount of malloced memory for buffers");
static int maxbufmallocspace;
SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 0,
static long maxbufmallocspace;
SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 0,
"Maximum amount of malloced memory for buffers");
static int lobufspace;
SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0,
static long lobufspace;
SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0,
"Minimum amount of buffers we want to have");
int hibufspace;
SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0,
long hibufspace;
SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0,
"Maximum allowed value of bufspace (excluding buf_daemon)");
static int bufreusecnt;
SYSCTL_INT(_vfs, OID_AUTO, bufreusecnt, CTLFLAG_RW, &bufreusecnt, 0,
@ -142,11 +142,11 @@ SYSCTL_INT(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt, 0,
static int bufdefragcnt;
SYSCTL_INT(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, &bufdefragcnt, 0,
"Number of times we have had to repeat buffer allocation to defragment");
static int lorunningspace;
SYSCTL_INT(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0,
static long lorunningspace;
SYSCTL_LONG(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0,
"Minimum preferred space used for in-progress I/O");
static int hirunningspace;
SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0,
static long hirunningspace;
SYSCTL_LONG(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0,
"Maximum amount of space to use for in-progress I/O");
int dirtybufferflushes;
SYSCTL_INT(_vfs, OID_AUTO, dirtybufferflushes, CTLFLAG_RW, &dirtybufferflushes,
@ -324,7 +324,7 @@ runningbufwakeup(struct buf *bp)
{
if (bp->b_runningbufspace) {
atomic_subtract_int(&runningbufspace, bp->b_runningbufspace);
atomic_subtract_long(&runningbufspace, bp->b_runningbufspace);
bp->b_runningbufspace = 0;
mtx_lock(&rbreqlock);
if (runningbufreq && runningbufspace <= lorunningspace) {
@ -444,7 +444,8 @@ bd_speedup(void)
caddr_t
kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
{
int maxbuf;
int tuned_nbuf;
long maxbuf;
/*
* physmem_est is in pages. Convert it to kilobytes (assumes
@ -474,11 +475,17 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
if (maxbcache && nbuf > maxbcache / BKVASIZE)
nbuf = maxbcache / BKVASIZE;
tuned_nbuf = 1;
} else
tuned_nbuf = 0;
/* XXX Avoid integer overflows later on with maxbufspace. */
maxbuf = (INT_MAX / 3) / BKVASIZE;
if (nbuf > maxbuf)
nbuf = maxbuf;
/* XXX Avoid unsigned long overflows later on with maxbufspace. */
maxbuf = (LONG_MAX / 3) / BKVASIZE;
if (nbuf > maxbuf) {
if (!tuned_nbuf)
printf("Warning: nbufs lowered from %d to %ld\n", nbuf,
maxbuf);
nbuf = maxbuf;
}
/*
@ -548,8 +555,8 @@ bufinit(void)
* this may result in KVM fragmentation which is not handled optimally
* by the system.
*/
maxbufspace = nbuf * BKVASIZE;
hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10);
maxbufspace = (long)nbuf * BKVASIZE;
hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10);
lobufspace = hibufspace - MAXBSIZE;
lorunningspace = 512 * 1024;
@ -577,7 +584,7 @@ bufinit(void)
* be met. We try to size hidirtybuffers to 3/4 our buffer space assuming
* BKVASIZE'd (8K) buffers.
*/
while (hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) {
while ((long)hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) {
hidirtybuffers >>= 1;
}
lodirtybuffers = hidirtybuffers / 2;
@ -613,7 +620,7 @@ bfreekva(struct buf *bp)
if (bp->b_kvasize) {
atomic_add_int(&buffreekvacnt, 1);
atomic_subtract_int(&bufspace, bp->b_kvasize);
atomic_subtract_long(&bufspace, bp->b_kvasize);
vm_map_remove(buffer_map, (vm_offset_t) bp->b_kvabase,
(vm_offset_t) bp->b_kvabase + bp->b_kvasize);
bp->b_kvasize = 0;
@ -837,7 +844,7 @@ bufwrite(struct buf *bp)
* Normal bwrites pipeline writes
*/
bp->b_runningbufspace = bp->b_bufsize;
atomic_add_int(&runningbufspace, bp->b_runningbufspace);
atomic_add_long(&runningbufspace, bp->b_runningbufspace);
if (!TD_IS_IDLETHREAD(curthread))
curthread->td_ru.ru_oublock++;
@ -1983,7 +1990,7 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
bp->b_kvabase = (caddr_t) addr;
bp->b_kvasize = maxsize;
atomic_add_int(&bufspace, bp->b_kvasize);
atomic_add_long(&bufspace, bp->b_kvasize);
atomic_add_int(&bufreusecnt, 1);
}
vm_map_unlock(buffer_map);
@ -2707,7 +2714,7 @@ allocbuf(struct buf *bp, int size)
} else {
free(bp->b_data, M_BIOBUF);
if (bp->b_bufsize) {
atomic_subtract_int(
atomic_subtract_long(
&bufmallocspace,
bp->b_bufsize);
bufspacewakeup();
@ -2744,7 +2751,7 @@ allocbuf(struct buf *bp, int size)
bp->b_bufsize = mbsize;
bp->b_bcount = size;
bp->b_flags |= B_MALLOC;
atomic_add_int(&bufmallocspace, mbsize);
atomic_add_long(&bufmallocspace, mbsize);
return 1;
}
origbuf = NULL;
@ -2758,7 +2765,7 @@ allocbuf(struct buf *bp, int size)
origbufsize = bp->b_bufsize;
bp->b_data = bp->b_kvabase;
if (bp->b_bufsize) {
atomic_subtract_int(&bufmallocspace,
atomic_subtract_long(&bufmallocspace,
bp->b_bufsize);
bufspacewakeup();
bp->b_bufsize = 0;

View File

@ -446,10 +446,10 @@ buf_countdeps(struct buf *bp, int i)
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */
extern int maxswzone; /* Max KVA for swap structures */
extern int maxbcache; /* Max KVA for buffer cache */
extern int runningbufspace;
extern int hibufspace;
extern long maxswzone; /* Max KVA for swap structures */
extern long maxbcache; /* Max KVA for buffer cache */
extern long runningbufspace;
extern long hibufspace;
extern int dirtybufthresh;
extern int bdwriteskip;
extern int dirtybufferflushes;

View File

@ -56,7 +56,7 @@
/*
* See sys_pipe.c for info on what these limits mean.
*/
extern int maxpipekva;
extern u_long maxpipekva;
/*
* Pipe buffer information.

View File

@ -2229,7 +2229,7 @@ ffs_copyonwrite(devvp, bp)
VI_UNLOCK(devvp);
if (saved_runningbufspace != 0) {
bp->b_runningbufspace = saved_runningbufspace;
atomic_add_int(&runningbufspace,
atomic_add_long(&runningbufspace,
bp->b_runningbufspace);
}
return (0); /* Snapshot gone */
@ -2354,7 +2354,7 @@ ffs_copyonwrite(devvp, bp)
*/
if (saved_runningbufspace != 0) {
bp->b_runningbufspace = saved_runningbufspace;
atomic_add_int(&runningbufspace, bp->b_runningbufspace);
atomic_add_long(&runningbufspace, bp->b_runningbufspace);
}
return (error);
}

View File

@ -1915,7 +1915,7 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
}
}
bp->b_runningbufspace = bp->b_bufsize;
atomic_add_int(&runningbufspace,
atomic_add_long(&runningbufspace,
bp->b_runningbufspace);
} else {
error = ffs_copyonwrite(vp, bp);

View File

@ -186,12 +186,12 @@ vm_ksubmap_init(struct kva_md_info *kmi)
panic("startup: table size inconsistency");
clean_map = kmem_suballoc(kernel_map, &kmi->clean_sva, &kmi->clean_eva,
nbuf * BKVASIZE + nswbuf * MAXPHYS, FALSE);
(long)nbuf * BKVASIZE + (long)nswbuf * MAXPHYS, FALSE);
buffer_map = kmem_suballoc(clean_map, &kmi->buffer_sva,
&kmi->buffer_eva, nbuf * BKVASIZE, FALSE);
&kmi->buffer_eva, (long)nbuf * BKVASIZE, FALSE);
buffer_map->system_map = 1;
pager_map = kmem_suballoc(clean_map, &kmi->pager_sva, &kmi->pager_eva,
nswbuf * MAXPHYS, FALSE);
(long)nswbuf * MAXPHYS, FALSE);
pager_map->system_map = 1;
exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
exec_map_entries * (ARG_MAX + (PAGE_SIZE * 3)), FALSE);

View File

@ -525,7 +525,7 @@ vnode_pager_input_smlfs(object, m)
bp->b_bcount = bsize;
bp->b_bufsize = bsize;
bp->b_runningbufspace = bp->b_bufsize;
atomic_add_int(&runningbufspace, bp->b_runningbufspace);
atomic_add_long(&runningbufspace, bp->b_runningbufspace);
/* do the input */
bp->b_iooffset = dbtob(bp->b_blkno);
@ -905,7 +905,7 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
bp->b_bcount = size;
bp->b_bufsize = size;
bp->b_runningbufspace = bp->b_bufsize;
atomic_add_int(&runningbufspace, bp->b_runningbufspace);
atomic_add_long(&runningbufspace, bp->b_runningbufspace);
PCPU_INC(cnt.v_vnodein);
PCPU_ADD(cnt.v_vnodepgsin, count);