24b6d11c34
a scalable concurrent allocator implementation. Reviewed by: current@ Approved by: phk, markm (mentor)
444 lines
14 KiB
Groff
444 lines
14 KiB
Groff
.\" Copyright (c) 1980, 1991, 1993
|
|
.\" The Regents of the University of California. All rights reserved.
|
|
.\"
|
|
.\" This code is derived from software contributed to Berkeley by
|
|
.\" the American National Standards Committee X3, on Information
|
|
.\" Processing Systems.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\" 3. Neither the name of the University nor the names of its contributors
|
|
.\" may be used to endorse or promote products derived from this software
|
|
.\" without specific prior written permission.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
.\" SUCH DAMAGE.
|
|
.\"
|
|
.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93
|
|
.\" $FreeBSD$
|
|
.\"
|
|
.Dd January 12, 2006
|
|
.Dt MALLOC 3
|
|
.Os
|
|
.Sh NAME
|
|
.Nm malloc , calloc , realloc , free , reallocf
|
|
.Nd general purpose memory allocation functions
|
|
.Sh LIBRARY
|
|
.Lb libc
|
|
.Sh SYNOPSIS
|
|
.In stdlib.h
|
|
.Ft void *
|
|
.Fn malloc "size_t size"
|
|
.Ft void *
|
|
.Fn calloc "size_t number" "size_t size"
|
|
.Ft void *
|
|
.Fn realloc "void *ptr" "size_t size"
|
|
.Ft void *
|
|
.Fn reallocf "void *ptr" "size_t size"
|
|
.Ft void
|
|
.Fn free "void *ptr"
|
|
.Ft const char *
|
|
.Va _malloc_options ;
|
|
.Ft void
|
|
.Fo \*(lp*_malloc_message\*(rp
|
|
.Fa "const char *p1" "const char *p2" "const char *p3" "const char *p4"
|
|
.Fc
|
|
.Sh DESCRIPTION
|
|
The
|
|
.Fn malloc
|
|
function allocates
|
|
.Fa size
|
|
bytes of uninitialized memory.
|
|
The allocated space is suitably aligned (after possible pointer coercion)
|
|
for storage of any type of object.
|
|
.Pp
|
|
The
|
|
.Fn calloc
|
|
function allocates space for
|
|
.Fa number
|
|
objects,
|
|
each
|
|
.Fa size
|
|
bytes in length.
|
|
The result is identical to calling
|
|
.Fn malloc
|
|
with an argument of
|
|
.Dq "number * size" ,
|
|
with the exception that the allocated memory is explicitly initialized
|
|
to zero bytes.
|
|
.Pp
|
|
The
|
|
.Fn realloc
|
|
function changes the size of the previously allocated memory referenced by
|
|
.Fa ptr
|
|
to
|
|
.Fa size
|
|
bytes.
|
|
The contents of the memory are unchanged up to the lesser of the new and
|
|
old sizes.
|
|
If the new size is larger,
|
|
the value of the newly allocated portion of the memory is undefined.
|
|
Upon success, the memory referenced by
|
|
.Fa ptr
|
|
is freed and a pointer to the newly allocated memory is returned.
|
|
Note that
|
|
.Fn realloc
|
|
and
|
|
.Fn reallocf
|
|
may move the memory allocation, resulting in a different return value than
|
|
.Fa ptr .
|
|
If
|
|
.Fa ptr
|
|
is
|
|
.Dv NULL ,
|
|
the
|
|
.Fn realloc
|
|
function behaves identically to
|
|
.Fn malloc
|
|
for the specified size.
|
|
.Pp
|
|
The
|
|
.Fn reallocf
|
|
function is identical to the
|
|
.Fn realloc
|
|
function, except that it
|
|
will free the passed pointer when the requested memory cannot be allocated.
|
|
This is a
|
|
.Fx
|
|
specific API designed to ease the problems with traditional coding styles
|
|
for realloc causing memory leaks in libraries.
|
|
.Pp
|
|
The
|
|
.Fn free
|
|
function causes the allocated memory referenced by
|
|
.Fa ptr
|
|
to be made available for future allocations.
|
|
If
|
|
.Fa ptr
|
|
is
|
|
.Dv NULL ,
|
|
no action occurs.
|
|
.Sh TUNING
|
|
Once, when the first call is made to one of these memory allocation
|
|
routines, various flags will be set or reset, which affect the
|
|
workings of this allocation implementation.
|
|
.Pp
|
|
The ``name'' of the file referenced by the symbolic link named
|
|
.Pa /etc/malloc.conf ,
|
|
the value of the environment variable
|
|
.Ev MALLOC_OPTIONS ,
|
|
and the string pointed to by the global variable
|
|
.Va _malloc_options
|
|
will be interpreted, in that order, character by character as flags.
|
|
.Pp
|
|
Most flags are single letters,
|
|
where uppercase indicates that the behavior is set, or on,
|
|
and lowercase means that the behavior is not set, or off.
|
|
.Bl -tag -width indent
|
|
.It A
|
|
All warnings (except for the warning about unknown
|
|
flags being set) become fatal.
|
|
The process will call
|
|
.Xr abort 3
|
|
in these cases.
|
|
.It C
|
|
Increase/decrease the size of the cache by a factor of two.
|
|
The default cache size is 256 objects for each arena.
|
|
This option can be specified multiple times.
|
|
.It J
|
|
Each byte of new memory allocated by
|
|
.Fn malloc ,
|
|
.Fn realloc
|
|
or
|
|
.Fn reallocf
|
|
will be initialized to 0xa5.
|
|
All memory returned by
|
|
.Fn free ,
|
|
.Fn realloc
|
|
or
|
|
.Fn reallocf
|
|
will be initialized to 0x5a.
|
|
This is intended for debugging and will impact performance negatively.
|
|
.It K
|
|
Increase/decrease the virtual memory chunk size by a factor of two.
|
|
The default chunk size is 16 MB.
|
|
This option can be specified multiple times.
|
|
.It N
|
|
Increase/decrease the number of arenas by a factor of two.
|
|
The default number of arenas is twice the number of CPUs, or one if there is a
|
|
single CPU.
|
|
This option can be specified multiple times.
|
|
.It P
|
|
Various statistics are printed at program exit via an
|
|
.Xr atexit 3
|
|
function.
|
|
This has the potential to cause deadlock for a multi-threaded process that exits
|
|
while one or more threads are executing in the memory allocation functions.
|
|
Therefore, this option should only be used with care; it is primarily intended
|
|
as a performance tuning aid during application development.
|
|
.It Q
|
|
Increase/decrease the size of the allocation quantum by a factor of two.
|
|
The default quantum is the minimum allowed by the architecture (typically 8 or
|
|
16 bytes).
|
|
This option can be specified multiple times.
|
|
.It U
|
|
Generate
|
|
.Dq utrace
|
|
entries for
|
|
.Xr ktrace 1 ,
|
|
for all operations.
|
|
Consult the source for details on this option.
|
|
.It V
|
|
Attempting to allocate zero bytes will return a
|
|
.Dv NULL
|
|
pointer instead of
|
|
a valid pointer.
|
|
(The default behavior is to make a minimal allocation and return a
|
|
pointer to it.)
|
|
This option is provided for System V compatibility.
|
|
This option is incompatible with the
|
|
.Dq X
|
|
option.
|
|
.It X
|
|
Rather than return failure for any allocation function,
|
|
display a diagnostic message on
|
|
.Dv stderr
|
|
and cause the program to drop
|
|
core (using
|
|
.Xr abort 3 ) .
|
|
This option should be set at compile time by including the following in
|
|
the source code:
|
|
.Bd -literal -offset indent
|
|
_malloc_options = "X";
|
|
.Ed
|
|
.It Z
|
|
Each byte of new memory allocated by
|
|
.Fn malloc ,
|
|
.Fn realloc
|
|
or
|
|
.Fn reallocf
|
|
will be initialized to 0x0.
|
|
Note that this initialization only happens once for each byte, so
|
|
.Fn realloc
|
|
and
|
|
.Fn reallocf
|
|
calls do not zero memory that was previously allocated.
|
|
This is intended for debugging and will impact performance negatively.
|
|
.El
|
|
.Pp
|
|
The
|
|
.Dq J
|
|
and
|
|
.Dq Z
|
|
options are intended for testing and debugging.
|
|
An application which changes its behavior when these options are used
|
|
is flawed.
|
|
.Sh RETURN VALUES
|
|
The
|
|
.Fn malloc
|
|
and
|
|
.Fn calloc
|
|
functions return a pointer to the allocated memory if successful; otherwise
|
|
a
|
|
.Dv NULL
|
|
pointer is returned and
|
|
.Va errno
|
|
is set to
|
|
.Er ENOMEM .
|
|
.Pp
|
|
The
|
|
.Fn realloc
|
|
and
|
|
.Fn reallocf
|
|
functions return a pointer, possibly identical to
|
|
.Fa ptr ,
|
|
to the allocated memory
|
|
if successful; otherwise a
|
|
.Dv NULL
|
|
pointer is returned, and
|
|
.Va errno
|
|
is set to
|
|
.Er ENOMEM
|
|
if the error was the result of an allocation failure.
|
|
The
|
|
.Fn realloc
|
|
function always leaves the original buffer intact
|
|
when an error occurs, whereas
|
|
.Fn reallocf
|
|
deallocates it in this case.
|
|
.Pp
|
|
The
|
|
.Fn free
|
|
function returns no value.
|
|
.Sh IMPLEMENTATION NOTES
|
|
This allocator uses multiple arenas in order to reduce lock contention for
|
|
threaded programs on multi-processor systems.
|
|
This works well with regard to threading scalability, but incurs some costs.
|
|
There is a small fixed per-arena overhead, and additionally, arenas manage
|
|
memory completely independently of each other, which means a small fixed
|
|
increase in overall memory fragmentation.
|
|
These overheads aren't generally an issue, given the number of arenas normally
|
|
used.
|
|
Note that using substantially more arenas than the default is not likely to
|
|
improve performance, mainly due to reduced cache performance.
|
|
However, it may make sense to reduce the number of arenas if an application
|
|
does not make much use of the allocation functions.
|
|
.Pp
|
|
This allocator uses a novel approach to object caching.
|
|
For objects below a size threshold (use the
|
|
.Dq P
|
|
option to discover the threshold), full deallocation and attempted coalescence
|
|
with adjacent memory regions are delayed.
|
|
This is so that if the application requests an allocation of that size soon
|
|
thereafter, the request can be met much more quickly.
|
|
Most applications heavily use a small number of object sizes, so this caching
|
|
has the potential to have a large positive performance impact.
|
|
However, the effectiveness of the cache depends on the cache being large enough
|
|
to absorb typical fluctuations in the number of allocated objects.
|
|
If an application routinely fluctuates by thousands of objects, then it may
|
|
make sense to increase the size of the cache.
|
|
Conversely, if an application's memory usage fluctuates very little, it may
|
|
make sense to reduce the size of the cache, so that unused regions can be
|
|
coalesced sooner.
|
|
.Pp
|
|
This allocator is very aggressive about tightly packing objects in memory, even
|
|
for objects much larger than the system page size.
|
|
For programs that allocate objects larger than half the system page size, this
|
|
has the potential to reduce memory footprint in comparison to other allocators.
|
|
However, it has some side effects that are important to keep in mind.
|
|
First, even multi-page objects can start at non-page-aligned addresses, since
|
|
the implementation only guarantees quantum alignment.
|
|
Second, this tight packing of objects can cause objects to share L1 cache
|
|
lines, which can be a performance issue for multi-threaded applications.
|
|
There are two ways to approach these issues.
|
|
First,
|
|
.Fn posix_memalign
|
|
provides the ability to align allocations as needed.
|
|
By aligning an allocation to at least the L1 cache line size, and padding the
|
|
allocation request by one cache line unit, the programmer can rest assured that
|
|
no cache line sharing will occur for the object.
|
|
Second, the
|
|
.Dq Q
|
|
option can be used to force all allocations to be aligned with the L1 cache
|
|
lines.
|
|
This approach should be used with care though, because although easy to
|
|
implement, it means that all allocations must be at least as large as the
|
|
quantum, which can cause severe internal fragmentation if the application
|
|
allocates many small objects.
|
|
.Sh DEBUGGING MALLOC PROBLEMS
|
|
The first thing to do is to set the
|
|
.Dq A
|
|
option.
|
|
This option forces a coredump (if possible) at the first sign of trouble,
|
|
rather than the normal policy of trying to continue if at all possible.
|
|
.Pp
|
|
It is probably also a good idea to recompile the program with suitable
|
|
options and symbols for debugger support.
|
|
.Pp
|
|
If the program starts to give unusual results, coredump or generally behave
|
|
differently without emitting any of the messages mentioned in the next
|
|
section, it is likely because it depends on the storage being filled with
|
|
zero bytes.
|
|
Try running it with the
|
|
.Dq Z
|
|
option set;
|
|
if that improves the situation, this diagnosis has been confirmed.
|
|
If the program still misbehaves,
|
|
the likely problem is accessing memory outside the allocated area.
|
|
.Pp
|
|
Alternatively, if the symptoms are not easy to reproduce, setting the
|
|
.Dq J
|
|
option may help provoke the problem.
|
|
.Pp
|
|
In truly difficult cases, the
|
|
.Dq U
|
|
option, if supported by the kernel, can provide a detailed trace of
|
|
all calls made to these functions.
|
|
.Pp
|
|
Unfortunately this implementation does not provide much detail about
|
|
the problems it detects; the performance impact for storing such information
|
|
would be prohibitive.
|
|
There are a number of allocation implementations available on the Internet
|
|
which focus on detecting and pinpointing problems by trading performance for
|
|
extra sanity checks and detailed diagnostics.
|
|
.Sh DIAGNOSTIC MESSAGES
|
|
If any of the memory allocation/deallocation functions detect an error or
|
|
warning condition, a message will be printed to file descriptor STDERR_FILENO.
|
|
Errors will result in the process dumping core.
|
|
If the
|
|
.Dq A
|
|
option is set, all warnings are treated as errors.
|
|
.Pp
|
|
The
|
|
.Va _malloc_message
|
|
variable allows the programmer to override the function which emits
|
|
the text strings forming the errors and warnings if for some reason
|
|
the
|
|
.Dv stderr
|
|
file descriptor is not suitable for this.
|
|
Please note that doing anything which tries to allocate memory in
|
|
this function is likely to result in a crash or deadlock.
|
|
.Pp
|
|
All messages are prefixed by:
|
|
.Bl -diag
|
|
.It <progname>: (malloc)
|
|
.El
|
|
.Sh ENVIRONMENT
|
|
The following environment variables affect the execution of the allocation
|
|
functions:
|
|
.Bl -tag -width ".Ev MALLOC_OPTIONS"
|
|
.It Ev MALLOC_OPTIONS
|
|
If the environment variable
|
|
.Ev MALLOC_OPTIONS
|
|
is set, the characters it contains will be interpreted as flags to the
|
|
allocation functions.
|
|
.El
|
|
.Sh EXAMPLES
|
|
To dump core whenever a problem occurs:
|
|
.Pp
|
|
.Bd -literal -offset indent
|
|
ln -s 'A' /etc/malloc.conf
|
|
.Ed
|
|
.Pp
|
|
To specify in the source that a program does no return value checking
|
|
on calls to these functions:
|
|
.Bd -literal -offset indent
|
|
_malloc_options = "X";
|
|
.Ed
|
|
.Sh SEE ALSO
|
|
.Xr mmap 2 ,
|
|
.Xr alloca 3 ,
|
|
.Xr atexit 3 ,
|
|
.Xr getpagesize 3 ,
|
|
.Xr memory 3 ,
|
|
.Xr posix_memalign 3
|
|
.Sh STANDARDS
|
|
The
|
|
.Fn malloc ,
|
|
.Fn calloc ,
|
|
.Fn realloc
|
|
and
|
|
.Fn free
|
|
functions conform to
|
|
.St -isoC .
|
|
.Sh HISTORY
|
|
The
|
|
.Fn reallocf
|
|
function first appeared in
|
|
.Fx 3.0 .
|