freebsd-skq/sys/kern/kern_malloc.c
dillon 7a2987cf94 Fix null-pointer dereference crash when the system is intentionally
run out of KVM through a mmap()/fork() bomb that allocates hundreds
    of thousands of vm_map_entry structures.

    Add panic to make null-pointer dereference crash a little more verbose.

    Add a new sysctl, vm.max_proc_mmap, which specifies the maximum number
    of mmap()'d spaces (discrete vm_map_entry's in the process).  The value
    defaults to around 9000 for a 128MB machine.  The test is scaled for the
    number of processes sharing a vmspace (aka linux threads).  Setting
    the value to 0 disables the feature.

PR: kern/16573
Approved by: jkh
2000-02-16 21:11:33 +00:00

533 lines
14 KiB
C

/*
* Copyright (c) 1987, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94
* $FreeBSD$
*/
#include "opt_vm.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/vmmeter.h>
#include <sys/lock.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#if defined(INVARIANTS) && defined(__i386__)
#include <machine/cpu.h>
#endif
MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches");
MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
static void kmeminit __P((void *));
SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL)
static MALLOC_DEFINE(M_FREE, "free", "should be on free list");
static struct malloc_type *kmemstatistics;
static struct kmembuckets bucket[MINBUCKET + 16];
static struct kmemusage *kmemusage;
static char *kmembase;
static char *kmemlimit;
u_int vm_kmem_size;
#ifdef INVARIANTS
/*
* This structure provides a set of masks to catch unaligned frees.
*/
static long addrmask[] = { 0,
0x00000001, 0x00000003, 0x00000007, 0x0000000f,
0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
};
/*
* The WEIRD_ADDR is used as known text to copy into free objects so
* that modifications after frees can be detected.
*/
#define WEIRD_ADDR 0xdeadc0de
#define MAX_COPY 64
/*
* Normally the first word of the structure is used to hold the list
* pointer for free objects. However, when running with diagnostics,
* we use the third and fourth fields, so as to catch modifications
* in the most commonly trashed first two words.
*/
struct freelist {
long spare0;
struct malloc_type *type;
long spare1;
caddr_t next;
};
#else /* !INVARIANTS */
struct freelist {
caddr_t next;
};
#endif /* INVARIANTS */
/*
* malloc:
*
* Allocate a block of memory.
*
* If M_NOWAIT is set, this routine will not block and return NULL if
* the allocation fails.
*
* If M_ASLEEP is set (M_NOWAIT must also be set), this routine
* will have the side effect of calling asleep() if it returns NULL,
* allowing the parent to await() at some future time.
*/
void *
malloc(size, type, flags)
unsigned long size;
struct malloc_type *type;
int flags;
{
register struct kmembuckets *kbp;
register struct kmemusage *kup;
register struct freelist *freep;
long indx, npg, allocsize;
int s;
caddr_t va, cp, savedlist;
#ifdef INVARIANTS
long *end, *lp;
int copysize;
const char *savedtype;
#endif
register struct malloc_type *ksp = type;
#if defined(INVARIANTS) && defined(__i386__)
if (flags == M_WAITOK)
KASSERT(intr_nesting_level == 0,
("malloc(M_WAITOK) in interrupt context"));
#endif
/*
* Must be at splmem() prior to initializing segment to handle
* potential initialization race.
*/
s = splmem();
if (type->ks_limit == 0)
malloc_init(type);
indx = BUCKETINDX(size);
kbp = &bucket[indx];
while (ksp->ks_memuse >= ksp->ks_limit) {
if (flags & M_ASLEEP) {
if (ksp->ks_limblocks < 65535)
ksp->ks_limblocks++;
asleep((caddr_t)ksp, PSWP+2, type->ks_shortdesc, 0);
}
if (flags & M_NOWAIT) {
splx(s);
return ((void *) NULL);
}
if (ksp->ks_limblocks < 65535)
ksp->ks_limblocks++;
tsleep((caddr_t)ksp, PSWP+2, type->ks_shortdesc, 0);
}
ksp->ks_size |= 1 << indx;
#ifdef INVARIANTS
copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY;
#endif
if (kbp->kb_next == NULL) {
kbp->kb_last = NULL;
if (size > MAXALLOCSAVE)
allocsize = roundup(size, PAGE_SIZE);
else
allocsize = 1 << indx;
npg = btoc(allocsize);
va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg), flags);
if (va == NULL) {
splx(s);
return ((void *) NULL);
}
kbp->kb_total += kbp->kb_elmpercl;
kup = btokup(va);
kup->ku_indx = indx;
if (allocsize > MAXALLOCSAVE) {
if (npg > 65535)
panic("malloc: allocation too large");
kup->ku_pagecnt = npg;
ksp->ks_memuse += allocsize;
goto out;
}
kup->ku_freecnt = kbp->kb_elmpercl;
kbp->kb_totalfree += kbp->kb_elmpercl;
/*
* Just in case we blocked while allocating memory,
* and someone else also allocated memory for this
* bucket, don't assume the list is still empty.
*/
savedlist = kbp->kb_next;
kbp->kb_next = cp = va + (npg * PAGE_SIZE) - allocsize;
for (;;) {
freep = (struct freelist *)cp;
#ifdef INVARIANTS
/*
* Copy in known text to detect modification
* after freeing.
*/
end = (long *)&cp[copysize];
for (lp = (long *)cp; lp < end; lp++)
*lp = WEIRD_ADDR;
freep->type = M_FREE;
#endif /* INVARIANTS */
if (cp <= va)
break;
cp -= allocsize;
freep->next = cp;
}
freep->next = savedlist;
if (kbp->kb_last == NULL)
kbp->kb_last = (caddr_t)freep;
}
va = kbp->kb_next;
kbp->kb_next = ((struct freelist *)va)->next;
#ifdef INVARIANTS
freep = (struct freelist *)va;
savedtype = (const char *) type->ks_shortdesc;
#if BYTE_ORDER == BIG_ENDIAN
freep->type = (struct malloc_type *)WEIRD_ADDR >> 16;
#endif
#if BYTE_ORDER == LITTLE_ENDIAN
freep->type = (struct malloc_type *)WEIRD_ADDR;
#endif
if ((intptr_t)(void *)&freep->next & 0x2)
freep->next = (caddr_t)((WEIRD_ADDR >> 16)|(WEIRD_ADDR << 16));
else
freep->next = (caddr_t)WEIRD_ADDR;
end = (long *)&va[copysize];
for (lp = (long *)va; lp < end; lp++) {
if (*lp == WEIRD_ADDR)
continue;
printf("%s %ld of object %p size %lu %s %s (0x%lx != 0x%lx)\n",
"Data modified on freelist: word",
(long)(lp - (long *)va), (void *)va, size,
"previous type", savedtype, *lp, (u_long)WEIRD_ADDR);
break;
}
freep->spare0 = 0;
#endif /* INVARIANTS */
kup = btokup(va);
if (kup->ku_indx != indx)
panic("malloc: wrong bucket");
if (kup->ku_freecnt == 0)
panic("malloc: lost data");
kup->ku_freecnt--;
kbp->kb_totalfree--;
ksp->ks_memuse += 1 << indx;
out:
kbp->kb_calls++;
ksp->ks_inuse++;
ksp->ks_calls++;
if (ksp->ks_memuse > ksp->ks_maxused)
ksp->ks_maxused = ksp->ks_memuse;
splx(s);
return ((void *) va);
}
/*
* free:
*
* Free a block of memory allocated by malloc.
*
* This routine may not block.
*/
void
free(addr, type)
void *addr;
struct malloc_type *type;
{
register struct kmembuckets *kbp;
register struct kmemusage *kup;
register struct freelist *freep;
long size;
int s;
#ifdef INVARIANTS
struct freelist *fp;
long *end, *lp, alloc, copysize;
#endif
register struct malloc_type *ksp = type;
if (type->ks_limit == 0)
panic("freeing with unknown type (%s)", type->ks_shortdesc);
KASSERT(kmembase <= (char *)addr && (char *)addr < kmemlimit,
("free: address %p out of range", (void *)addr));
kup = btokup(addr);
size = 1 << kup->ku_indx;
kbp = &bucket[kup->ku_indx];
s = splmem();
#ifdef INVARIANTS
/*
* Check for returns of data that do not point to the
* beginning of the allocation.
*/
if (size > PAGE_SIZE)
alloc = addrmask[BUCKETINDX(PAGE_SIZE)];
else
alloc = addrmask[kup->ku_indx];
if (((uintptr_t)(void *)addr & alloc) != 0)
panic("free: unaligned addr %p, size %ld, type %s, mask %ld",
(void *)addr, size, type->ks_shortdesc, alloc);
#endif /* INVARIANTS */
if (size > MAXALLOCSAVE) {
kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt));
size = kup->ku_pagecnt << PAGE_SHIFT;
ksp->ks_memuse -= size;
kup->ku_indx = 0;
kup->ku_pagecnt = 0;
if (ksp->ks_memuse + size >= ksp->ks_limit &&
ksp->ks_memuse < ksp->ks_limit)
wakeup((caddr_t)ksp);
ksp->ks_inuse--;
kbp->kb_total -= 1;
splx(s);
return;
}
freep = (struct freelist *)addr;
#ifdef INVARIANTS
/*
* Check for multiple frees. Use a quick check to see if
* it looks free before laboriously searching the freelist.
*/
if (freep->spare0 == WEIRD_ADDR) {
fp = (struct freelist *)kbp->kb_next;
while (fp) {
if (fp->spare0 != WEIRD_ADDR)
panic("free: free item %p modified", fp);
else if (addr == (caddr_t)fp)
panic("free: multiple freed item %p", addr);
fp = (struct freelist *)fp->next;
}
}
/*
* Copy in known text to detect modification after freeing
* and to make it look free. Also, save the type being freed
* so we can list likely culprit if modification is detected
* when the object is reallocated.
*/
copysize = size < MAX_COPY ? size : MAX_COPY;
end = (long *)&((caddr_t)addr)[copysize];
for (lp = (long *)addr; lp < end; lp++)
*lp = WEIRD_ADDR;
freep->type = type;
#endif /* INVARIANTS */
kup->ku_freecnt++;
if (kup->ku_freecnt >= kbp->kb_elmpercl) {
if (kup->ku_freecnt > kbp->kb_elmpercl)
panic("free: multiple frees");
else if (kbp->kb_totalfree > kbp->kb_highwat)
kbp->kb_couldfree++;
}
kbp->kb_totalfree++;
ksp->ks_memuse -= size;
if (ksp->ks_memuse + size >= ksp->ks_limit &&
ksp->ks_memuse < ksp->ks_limit)
wakeup((caddr_t)ksp);
ksp->ks_inuse--;
#ifdef OLD_MALLOC_MEMORY_POLICY
if (kbp->kb_next == NULL)
kbp->kb_next = addr;
else
((struct freelist *)kbp->kb_last)->next = addr;
freep->next = NULL;
kbp->kb_last = addr;
#else
/*
* Return memory to the head of the queue for quick reuse. This
* can improve performance by improving the probability of the
* item being in the cache when it is reused.
*/
if (kbp->kb_next == NULL) {
kbp->kb_next = addr;
kbp->kb_last = addr;
freep->next = NULL;
} else {
freep->next = kbp->kb_next;
kbp->kb_next = addr;
}
#endif
splx(s);
}
/*
* Initialize the kernel memory allocator
*/
/* ARGSUSED*/
static void
kmeminit(dummy)
void *dummy;
{
register long indx;
u_long npg;
u_long mem_size;
u_long xvm_kmem_size;
#if ((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0)
#error "kmeminit: MAXALLOCSAVE not power of 2"
#endif
#if (MAXALLOCSAVE > MINALLOCSIZE * 32768)
#error "kmeminit: MAXALLOCSAVE too big"
#endif
#if (MAXALLOCSAVE < PAGE_SIZE)
#error "kmeminit: MAXALLOCSAVE too small"
#endif
/*
* Try to auto-tune the kernel memory size, so that it is
* more applicable for a wider range of machine sizes.
* On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while
* a VM_KMEM_SIZE of 12MB is a fair compromise. The
* VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
* available, and on an X86 with a total KVA space of 256MB,
* try to keep VM_KMEM_SIZE_MAX at 80MB or below.
*
* Note that the kmem_map is also used by the zone allocator,
* so make sure that there is enough space.
*/
xvm_kmem_size = VM_KMEM_SIZE;
mem_size = cnt.v_page_count * PAGE_SIZE;
#if defined(VM_KMEM_SIZE_SCALE)
if ((mem_size / VM_KMEM_SIZE_SCALE) > xvm_kmem_size)
xvm_kmem_size = mem_size / VM_KMEM_SIZE_SCALE;
#endif
#if defined(VM_KMEM_SIZE_MAX)
if (xvm_kmem_size >= VM_KMEM_SIZE_MAX)
xvm_kmem_size = VM_KMEM_SIZE_MAX;
#endif
/* Allow final override from the kernel environment */
TUNABLE_INT_FETCH("kern.vm.kmem.size", xvm_kmem_size, vm_kmem_size);
/*
* Limit kmem virtual size to twice the physical memory.
* This allows for kmem map sparseness, but limits the size
* to something sane. Be careful to not overflow the 32bit
* ints while doing the check.
*/
if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE))
vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE;
npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + vm_kmem_size)
/ PAGE_SIZE;
kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
(vm_size_t)(npg * sizeof(struct kmemusage)));
kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
(vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE));
kmem_map->system_map = 1;
for (indx = 0; indx < MINBUCKET + 16; indx++) {
if (1 << indx >= PAGE_SIZE)
bucket[indx].kb_elmpercl = 1;
else
bucket[indx].kb_elmpercl = PAGE_SIZE / (1 << indx);
bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl;
}
}
void
malloc_init(data)
void *data;
{
struct malloc_type *type = (struct malloc_type *)data;
if (type->ks_magic != M_MAGIC)
panic("malloc type lacks magic");
if (type->ks_limit != 0)
return;
if (cnt.v_page_count == 0)
panic("malloc_init not allowed before vm init");
/*
* The default limits for each malloc region is 1/2 of the
* malloc portion of the kmem map size.
*/
type->ks_limit = vm_kmem_size / 2;
type->ks_next = kmemstatistics;
kmemstatistics = type;
}
void
malloc_uninit(data)
void *data;
{
struct malloc_type *type = (struct malloc_type *)data;
struct malloc_type *t;
if (type->ks_magic != M_MAGIC)
panic("malloc type lacks magic");
if (cnt.v_page_count == 0)
panic("malloc_uninit not allowed before vm init");
if (type->ks_limit == 0)
panic("malloc_uninit on uninitialized type");
if (type == kmemstatistics)
kmemstatistics = type->ks_next;
else {
for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) {
if (t->ks_next == type) {
t->ks_next = type->ks_next;
break;
}
}
}
type->ks_next = NULL;
type->ks_limit = 0;
}