2005-01-07 02:29:27 +00:00
|
|
|
/*-
|
2009-01-25 09:11:24 +00:00
|
|
|
* Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff@FreeBSD.org>
|
2005-07-16 09:51:52 +00:00
|
|
|
* Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
|
2006-10-26 12:55:32 +00:00
|
|
|
* Copyright (c) 2004-2006 Robert N. M. Watson
|
2005-07-16 09:51:52 +00:00
|
|
|
* All rights reserved.
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice unmodified, this list of conditions, and the following
|
|
|
|
* disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* uma_core.c Implementation of the Universal Memory allocator
|
|
|
|
*
|
|
|
|
* This allocator is intended to replace the multitude of similar object caches
|
|
|
|
* in the standard FreeBSD kernel. The intent is to be flexible as well as
|
|
|
|
* effecient. A primary design goal is to return unused memory to the rest of
|
2004-01-30 16:26:29 +00:00
|
|
|
* the system. This will make the system as a whole more flexible due to the
|
2002-03-19 09:11:49 +00:00
|
|
|
* ability to move memory to subsystems which most need it instead of leaving
|
|
|
|
* pools of reserved memory unused.
|
|
|
|
*
|
|
|
|
* The basic ideas stem from similar slab/zone based allocators whose algorithms
|
|
|
|
* are well known.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TODO:
|
|
|
|
* - Improve memory usage for large allocations
|
|
|
|
* - Investigate cache size adjustments
|
|
|
|
*/
|
|
|
|
|
2003-06-11 23:50:51 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/* I should really use ktr.. */
|
|
|
|
/*
|
|
|
|
#define UMA_DEBUG 1
|
|
|
|
#define UMA_DEBUG_ALLOC 1
|
|
|
|
#define UMA_DEBUG_ALLOC_1 1
|
|
|
|
*/
|
|
|
|
|
2005-10-20 16:39:33 +00:00
|
|
|
#include "opt_ddb.h"
|
2002-03-19 09:11:49 +00:00
|
|
|
#include "opt_param.h"
|
2011-10-12 18:08:28 +00:00
|
|
|
#include "opt_vm.h"
|
2005-10-20 16:39:33 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/queue.h>
|
|
|
|
#include <sys/malloc.h>
|
2004-08-06 21:52:38 +00:00
|
|
|
#include <sys/ktr.h>
|
2002-03-19 09:11:49 +00:00
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/mutex.h>
|
2002-05-20 17:54:48 +00:00
|
|
|
#include <sys/proc.h>
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
#include <sys/sbuf.h>
|
2002-03-19 09:11:49 +00:00
|
|
|
#include <sys/smp.h>
|
2002-04-08 06:20:34 +00:00
|
|
|
#include <sys/vmmeter.h>
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
#include <vm/vm.h>
|
|
|
|
#include <vm/vm_object.h>
|
|
|
|
#include <vm/vm_page.h>
|
|
|
|
#include <vm/vm_param.h>
|
|
|
|
#include <vm/vm_map.h>
|
|
|
|
#include <vm/vm_kern.h>
|
|
|
|
#include <vm/vm_extern.h>
|
|
|
|
#include <vm/uma.h>
|
|
|
|
#include <vm/uma_int.h>
|
2002-05-02 02:08:48 +00:00
|
|
|
#include <vm/uma_dbg.h>
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2005-10-20 16:39:33 +00:00
|
|
|
#include <ddb/ddb.h>
|
|
|
|
|
2011-10-12 18:08:28 +00:00
|
|
|
#ifdef DEBUG_MEMGUARD
|
|
|
|
#include <vm/memguard.h>
|
|
|
|
#endif
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
* This is the zone and keg from which all zones are spawned. The idea is that
|
|
|
|
* even the zone & keg heads are allocated from the allocator, so we use the
|
|
|
|
* bss section to bootstrap us.
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
static struct uma_keg masterkeg;
|
|
|
|
static struct uma_zone masterzone_k;
|
|
|
|
static struct uma_zone masterzone_z;
|
|
|
|
static uma_zone_t kegs = &masterzone_k;
|
|
|
|
static uma_zone_t zones = &masterzone_z;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/* This is the zone from which all of uma_slab_t's are allocated. */
|
|
|
|
static uma_zone_t slabzone;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The initial hash tables come out of this zone so they can be allocated
|
|
|
|
* prior to malloc coming up.
|
|
|
|
*/
|
|
|
|
static uma_zone_t hashzone;
|
|
|
|
|
2007-02-11 20:13:52 +00:00
|
|
|
/* The boot-time adjusted value for cache line alignment. */
|
2011-03-21 09:40:01 +00:00
|
|
|
int uma_align_cache = 64 - 1;
|
2007-02-11 20:13:52 +00:00
|
|
|
|
2003-09-19 07:23:50 +00:00
|
|
|
static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
|
|
|
|
|
2002-04-08 06:20:34 +00:00
|
|
|
/*
|
|
|
|
* Are we allowed to allocate buckets?
|
|
|
|
*/
|
|
|
|
static int bucketdisable = 1;
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/* Linked list of all kegs in the system */
|
2009-12-28 22:56:30 +00:00
|
|
|
static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/* This mutex protects the keg list */
|
2002-03-19 09:11:49 +00:00
|
|
|
static struct mtx uma_mtx;
|
|
|
|
|
|
|
|
/* Linked list of boot time pages */
|
|
|
|
static LIST_HEAD(,uma_slab) uma_boot_pages =
|
2009-12-28 22:56:30 +00:00
|
|
|
LIST_HEAD_INITIALIZER(uma_boot_pages);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2005-09-09 06:03:08 +00:00
|
|
|
/* This mutex protects the boot time pages list */
|
|
|
|
static struct mtx uma_boot_pages_mtx;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/* Is the VM done starting up? */
|
|
|
|
static int booted = 0;
|
1. Prior to r214782, UMA did not support multipage allocations before
uma_startup2() was called. Thus, setting the variable "booted" to true in
uma_startup() was ok on machines with UMA_MD_SMALL_ALLOC defined, because
any allocations made after uma_startup() but before uma_startup2() could be
satisfied by uma_small_alloc(). Now, however, some multipage allocations
are necessary before uma_startup2() just to allocate zone structures on
machines with a large number of processors. Thus, a Boolean can no longer
effectively describe the state of the UMA allocator. Instead, make "booted"
have three values to describe how far initialization has progressed. This
allows multipage allocations to continue using startup_alloc() until
uma_startup2(), but single-page allocations may begin using
uma_small_alloc() after uma_startup().
2. With the aforementioned change, only a modest increase in boot pages is
necessary to boot UMA on a large number of processors.
3. Retire UMA_MD_SMALL_ALLOC_NEEDS_VM. It has only been used between
r182028 and r204128.
Reviewed by: attilio [1], nwhitehorn [3]
Tested by: sbruno
2011-05-21 17:43:43 +00:00
|
|
|
#define UMA_STARTUP 1
|
|
|
|
#define UMA_STARTUP2 2
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2004-07-29 15:25:40 +00:00
|
|
|
/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
|
|
|
|
static u_int uma_max_ipers;
|
|
|
|
static u_int uma_max_ipers_ref;
|
|
|
|
|
2003-09-19 23:27:46 +00:00
|
|
|
/*
|
|
|
|
* This is the handle used to schedule events that need to happen
|
|
|
|
* outside of the allocation fast path.
|
|
|
|
*/
|
2002-03-19 09:11:49 +00:00
|
|
|
static struct callout uma_callout;
|
2003-09-19 23:27:46 +00:00
|
|
|
#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This structure is passed as the zone ctor arg so that I don't have to create
|
|
|
|
* a special allocation function just for zones.
|
|
|
|
*/
|
|
|
|
struct uma_zctor_args {
|
|
|
|
char *name;
|
2002-05-02 07:36:30 +00:00
|
|
|
size_t size;
|
2002-03-19 09:11:49 +00:00
|
|
|
uma_ctor ctor;
|
|
|
|
uma_dtor dtor;
|
|
|
|
uma_init uminit;
|
|
|
|
uma_fini fini;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
|
|
|
int align;
|
2005-07-16 02:23:41 +00:00
|
|
|
u_int32_t flags;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct uma_kctor_args {
|
|
|
|
uma_zone_t zone;
|
|
|
|
size_t size;
|
|
|
|
uma_init uminit;
|
|
|
|
uma_fini fini;
|
2002-03-19 09:11:49 +00:00
|
|
|
int align;
|
2005-07-16 02:23:41 +00:00
|
|
|
u_int32_t flags;
|
2002-03-19 09:11:49 +00:00
|
|
|
};
|
|
|
|
|
2003-09-19 06:26:45 +00:00
|
|
|
struct uma_bucket_zone {
|
|
|
|
uma_zone_t ubz_zone;
|
|
|
|
char *ubz_name;
|
|
|
|
int ubz_entries;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define BUCKET_MAX 128
|
|
|
|
|
|
|
|
struct uma_bucket_zone bucket_zones[] = {
|
|
|
|
{ NULL, "16 Bucket", 16 },
|
|
|
|
{ NULL, "32 Bucket", 32 },
|
|
|
|
{ NULL, "64 Bucket", 64 },
|
|
|
|
{ NULL, "128 Bucket", 128 },
|
|
|
|
{ NULL, NULL, 0}
|
|
|
|
};
|
|
|
|
|
|
|
|
#define BUCKET_SHIFT 4
|
|
|
|
#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
|
|
|
|
|
2004-11-06 11:24:40 +00:00
|
|
|
/*
|
|
|
|
* bucket_size[] maps requested bucket sizes to zones that allocate a bucket
|
|
|
|
* of approximately the right size.
|
|
|
|
*/
|
|
|
|
static uint8_t bucket_size[BUCKET_ZONES];
|
2003-09-19 06:26:45 +00:00
|
|
|
|
2005-07-15 23:34:39 +00:00
|
|
|
/*
|
|
|
|
* Flags and enumerations to be passed to internal functions.
|
|
|
|
*/
|
2004-08-02 00:18:36 +00:00
|
|
|
enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
|
|
|
|
|
2005-07-15 23:34:39 +00:00
|
|
|
#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
|
2005-07-20 18:47:42 +00:00
|
|
|
#define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */
|
2005-07-15 23:34:39 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/* Prototypes.. */
|
|
|
|
|
|
|
|
static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
|
|
|
|
static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
|
2003-09-21 07:39:16 +00:00
|
|
|
static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
|
2002-03-19 09:11:49 +00:00
|
|
|
static void page_free(void *, int, u_int8_t);
|
2009-01-25 09:11:24 +00:00
|
|
|
static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
|
2003-09-19 23:27:46 +00:00
|
|
|
static void cache_drain(uma_zone_t);
|
2002-03-19 09:11:49 +00:00
|
|
|
static void bucket_drain(uma_zone_t, uma_bucket_t);
|
2004-02-01 06:15:17 +00:00
|
|
|
static void bucket_cache_drain(uma_zone_t zone);
|
2004-08-02 00:18:36 +00:00
|
|
|
static int keg_ctor(void *, int, void *, int);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
static void keg_dtor(void *, int, void *);
|
2004-08-02 00:18:36 +00:00
|
|
|
static int zone_ctor(void *, int, void *, int);
|
2002-04-08 04:48:58 +00:00
|
|
|
static void zone_dtor(void *, int, void *);
|
2004-08-02 00:18:36 +00:00
|
|
|
static int zero_init(void *, int, int);
|
2009-01-25 09:11:24 +00:00
|
|
|
static void keg_small_init(uma_keg_t keg);
|
|
|
|
static void keg_large_init(uma_keg_t keg);
|
2002-03-19 09:11:49 +00:00
|
|
|
static void zone_foreach(void (*zfunc)(uma_zone_t));
|
|
|
|
static void zone_timeout(uma_zone_t zone);
|
2002-05-13 04:39:28 +00:00
|
|
|
static int hash_alloc(struct uma_hash *);
|
|
|
|
static int hash_expand(struct uma_hash *, struct uma_hash *);
|
|
|
|
static void hash_free(struct uma_hash *hash);
|
2002-03-19 09:11:49 +00:00
|
|
|
static void uma_timeout(void *);
|
|
|
|
static void uma_startup3(void);
|
2009-01-25 09:11:24 +00:00
|
|
|
static void *zone_alloc_item(uma_zone_t, void *, int);
|
|
|
|
static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip,
|
2005-07-15 23:34:39 +00:00
|
|
|
int);
|
2002-04-08 06:20:34 +00:00
|
|
|
static void bucket_enable(void);
|
2003-09-19 06:26:45 +00:00
|
|
|
static void bucket_init(void);
|
|
|
|
static uma_bucket_t bucket_alloc(int, int);
|
|
|
|
static void bucket_free(uma_bucket_t);
|
|
|
|
static void bucket_zone_drain(void);
|
2009-01-25 09:11:24 +00:00
|
|
|
static int zone_alloc_bucket(uma_zone_t zone, int flags);
|
|
|
|
static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
|
|
|
|
static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
|
|
|
|
static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
|
|
|
|
static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
|
2005-07-16 02:23:41 +00:00
|
|
|
uma_fini fini, int align, u_int32_t flags);
|
2009-01-25 09:11:24 +00:00
|
|
|
static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
|
|
|
|
static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
|
2002-10-24 07:59:03 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
void uma_print_zone(uma_zone_t);
|
|
|
|
void uma_print_stats(void);
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
|
|
|
|
static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
|
|
|
|
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
|
|
|
|
0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
|
|
|
|
|
|
|
|
SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
|
|
|
|
0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
|
|
|
|
|
2002-04-08 06:20:34 +00:00
|
|
|
/*
|
|
|
|
* This routine checks to see whether or not it's safe to enable buckets.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
bucket_enable(void)
|
|
|
|
{
|
2007-05-31 22:52:15 +00:00
|
|
|
if (cnt.v_free_count < cnt.v_free_min)
|
2002-04-08 06:20:34 +00:00
|
|
|
bucketdisable = 1;
|
|
|
|
else
|
|
|
|
bucketdisable = 0;
|
|
|
|
}
|
|
|
|
|
2004-11-06 11:43:30 +00:00
|
|
|
/*
|
|
|
|
* Initialize bucket_zones, the array of zones of buckets of various sizes.
|
|
|
|
*
|
|
|
|
* For each zone, calculate the memory required for each bucket, consisting
|
|
|
|
* of the header and an array of pointers. Initialize bucket_size[] to point
|
|
|
|
* the range of appropriate bucket sizes at the zone.
|
|
|
|
*/
|
2003-09-19 06:26:45 +00:00
|
|
|
static void
|
|
|
|
bucket_init(void)
|
|
|
|
{
|
|
|
|
struct uma_bucket_zone *ubz;
|
|
|
|
int i;
|
|
|
|
int j;
|
|
|
|
|
|
|
|
for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
|
|
|
|
int size;
|
|
|
|
|
|
|
|
ubz = &bucket_zones[j];
|
|
|
|
size = roundup(sizeof(struct uma_bucket), sizeof(void *));
|
|
|
|
size += sizeof(void *) * ubz->ubz_entries;
|
|
|
|
ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
|
2009-01-25 09:11:24 +00:00
|
|
|
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
|
|
|
|
UMA_ZFLAG_INTERNAL | UMA_ZFLAG_BUCKET);
|
2003-09-19 06:26:45 +00:00
|
|
|
for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
|
|
|
|
bucket_size[i >> BUCKET_SHIFT] = j;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-11-06 11:43:30 +00:00
|
|
|
/*
|
|
|
|
* Given a desired number of entries for a bucket, return the zone from which
|
|
|
|
* to allocate the bucket.
|
|
|
|
*/
|
|
|
|
static struct uma_bucket_zone *
|
|
|
|
bucket_zone_lookup(int entries)
|
|
|
|
{
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
idx = howmany(entries, 1 << BUCKET_SHIFT);
|
|
|
|
return (&bucket_zones[bucket_size[idx]]);
|
|
|
|
}
|
|
|
|
|
2003-09-19 06:26:45 +00:00
|
|
|
static uma_bucket_t
|
|
|
|
bucket_alloc(int entries, int bflags)
|
|
|
|
{
|
|
|
|
struct uma_bucket_zone *ubz;
|
|
|
|
uma_bucket_t bucket;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is to stop us from allocating per cpu buckets while we're
|
2005-10-08 21:03:54 +00:00
|
|
|
* running out of vm.boot_pages. Otherwise, we would exhaust the
|
2003-09-19 06:26:45 +00:00
|
|
|
* boot pages. This also prevents us from allocating buckets in
|
|
|
|
* low memory situations.
|
|
|
|
*/
|
|
|
|
if (bucketdisable)
|
|
|
|
return (NULL);
|
2004-11-06 11:43:30 +00:00
|
|
|
|
|
|
|
ubz = bucket_zone_lookup(entries);
|
2009-01-25 09:11:24 +00:00
|
|
|
bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
|
2003-09-19 06:26:45 +00:00
|
|
|
if (bucket) {
|
|
|
|
#ifdef INVARIANTS
|
|
|
|
bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
|
|
|
|
#endif
|
|
|
|
bucket->ub_cnt = 0;
|
|
|
|
bucket->ub_entries = ubz->ubz_entries;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (bucket);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bucket_free(uma_bucket_t bucket)
|
|
|
|
{
|
|
|
|
struct uma_bucket_zone *ubz;
|
|
|
|
|
2004-11-06 11:43:30 +00:00
|
|
|
ubz = bucket_zone_lookup(bucket->ub_entries);
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
|
2005-07-20 18:47:42 +00:00
|
|
|
ZFREE_STATFREE);
|
2003-09-19 06:26:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bucket_zone_drain(void)
|
|
|
|
{
|
|
|
|
struct uma_bucket_zone *ubz;
|
|
|
|
|
|
|
|
for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
|
|
|
|
zone_drain(ubz->ubz_zone);
|
|
|
|
}
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
static inline uma_keg_t
|
|
|
|
zone_first_keg(uma_zone_t zone)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
|
|
|
|
{
|
|
|
|
uma_klink_t klink;
|
|
|
|
|
|
|
|
LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
|
|
|
|
kegfn(klink->kl_keg);
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Routine called by timeout which is used to fire off some time interval
|
2003-09-19 23:27:46 +00:00
|
|
|
* based calculations. (stats, hash size, etc.)
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Arguments:
|
|
|
|
* arg Unused
|
2004-01-30 16:26:29 +00:00
|
|
|
*
|
2002-03-19 09:11:49 +00:00
|
|
|
* Returns:
|
|
|
|
* Nothing
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
uma_timeout(void *unused)
|
|
|
|
{
|
2002-04-08 06:20:34 +00:00
|
|
|
bucket_enable();
|
2002-03-19 09:11:49 +00:00
|
|
|
zone_foreach(zone_timeout);
|
|
|
|
|
|
|
|
/* Reschedule this event */
|
2003-09-19 23:27:46 +00:00
|
|
|
callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2003-09-19 23:27:46 +00:00
|
|
|
* Routine to perform timeout driven calculations. This expands the
|
|
|
|
* hashes and does per cpu statistics aggregation.
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
2009-01-25 09:11:24 +00:00
|
|
|
* Returns nothing.
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
|
|
|
static void
|
2009-01-25 09:11:24 +00:00
|
|
|
keg_timeout(uma_keg_t keg)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK(keg);
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
2009-01-25 09:11:24 +00:00
|
|
|
* Expand the keg hash table.
|
2004-01-30 16:26:29 +00:00
|
|
|
*
|
2002-03-19 09:11:49 +00:00
|
|
|
* This is done if the number of slabs is larger than the hash size.
|
|
|
|
* What I'm trying to do here is completely reduce collisions. This
|
|
|
|
* may be a little aggressive. Should I allow for two collisions max?
|
|
|
|
*/
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_HASH &&
|
|
|
|
keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
|
2002-09-18 08:26:30 +00:00
|
|
|
struct uma_hash newhash;
|
|
|
|
struct uma_hash oldhash;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/*
|
2004-01-30 16:26:29 +00:00
|
|
|
* This is so involved because allocating and freeing
|
2009-01-25 09:11:24 +00:00
|
|
|
* while the keg lock is held will lead to deadlock.
|
2002-09-18 08:26:30 +00:00
|
|
|
* I have to do everything in stages and check for
|
|
|
|
* races.
|
|
|
|
*/
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
newhash = keg->uk_hash;
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_UNLOCK(keg);
|
2002-09-18 08:26:30 +00:00
|
|
|
ret = hash_alloc(&newhash);
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK(keg);
|
2002-09-18 08:26:30 +00:00
|
|
|
if (ret) {
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (hash_expand(&keg->uk_hash, &newhash)) {
|
|
|
|
oldhash = keg->uk_hash;
|
|
|
|
keg->uk_hash = newhash;
|
2002-09-18 08:26:30 +00:00
|
|
|
} else
|
|
|
|
oldhash = newhash;
|
2002-05-13 04:39:28 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_UNLOCK(keg);
|
2002-09-18 08:26:30 +00:00
|
|
|
hash_free(&oldhash);
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK(keg);
|
2002-04-14 13:47:10 +00:00
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_UNLOCK(keg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
zone_timeout(uma_zone_t zone)
|
|
|
|
{
|
|
|
|
|
|
|
|
zone_foreach_keg(zone, &keg_timeout);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2002-04-14 13:47:10 +00:00
|
|
|
/*
|
|
|
|
* Allocate and zero fill the next sized hash table from the appropriate
|
|
|
|
* backing store.
|
|
|
|
*
|
|
|
|
* Arguments:
|
2002-05-13 04:39:28 +00:00
|
|
|
* hash A new hash structure with the old hash size in uh_hashsize
|
2002-04-14 13:47:10 +00:00
|
|
|
*
|
|
|
|
* Returns:
|
2002-05-13 04:39:28 +00:00
|
|
|
* 1 on sucess and 0 on failure.
|
2002-04-14 13:47:10 +00:00
|
|
|
*/
|
2002-09-28 17:15:38 +00:00
|
|
|
static int
|
2002-05-13 04:39:28 +00:00
|
|
|
hash_alloc(struct uma_hash *hash)
|
2002-04-14 13:47:10 +00:00
|
|
|
{
|
2002-05-13 04:39:28 +00:00
|
|
|
int oldsize;
|
2002-04-14 13:47:10 +00:00
|
|
|
int alloc;
|
|
|
|
|
2002-05-13 04:39:28 +00:00
|
|
|
oldsize = hash->uh_hashsize;
|
|
|
|
|
2002-04-14 13:47:10 +00:00
|
|
|
/* We're just going to go to a power of two greater */
|
2002-05-13 04:39:28 +00:00
|
|
|
if (oldsize) {
|
|
|
|
hash->uh_hashsize = oldsize * 2;
|
|
|
|
alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
|
|
|
|
hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
|
2003-09-19 07:23:50 +00:00
|
|
|
M_UMAHASH, M_NOWAIT);
|
2002-04-14 13:47:10 +00:00
|
|
|
} else {
|
2002-05-13 04:39:28 +00:00
|
|
|
alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
|
2009-01-25 09:11:24 +00:00
|
|
|
hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
|
2003-02-19 05:47:46 +00:00
|
|
|
M_WAITOK);
|
2002-05-13 04:39:28 +00:00
|
|
|
hash->uh_hashsize = UMA_HASH_SIZE_INIT;
|
|
|
|
}
|
|
|
|
if (hash->uh_slab_hash) {
|
|
|
|
bzero(hash->uh_slab_hash, alloc);
|
|
|
|
hash->uh_hashmask = hash->uh_hashsize - 1;
|
|
|
|
return (1);
|
2002-04-14 13:47:10 +00:00
|
|
|
}
|
|
|
|
|
2002-05-13 04:39:28 +00:00
|
|
|
return (0);
|
2002-04-14 13:47:10 +00:00
|
|
|
}
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
2003-09-19 22:31:45 +00:00
|
|
|
* Expands the hash table for HASH zones. This is done from zone_timeout
|
|
|
|
* to reduce collisions. This must not be done in the regular allocation
|
|
|
|
* path, otherwise, we can recurse on the vm while allocating pages.
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Arguments:
|
2004-01-30 16:26:29 +00:00
|
|
|
* oldhash The hash you want to expand
|
2002-05-13 04:39:28 +00:00
|
|
|
* newhash The hash structure for the new table
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Returns:
|
2004-01-30 16:26:29 +00:00
|
|
|
* Nothing
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Discussion:
|
|
|
|
*/
|
2002-05-13 04:39:28 +00:00
|
|
|
static int
|
|
|
|
hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
|
|
|
uma_slab_t slab;
|
|
|
|
int hval;
|
|
|
|
int i;
|
|
|
|
|
2002-05-13 04:39:28 +00:00
|
|
|
if (!newhash->uh_slab_hash)
|
|
|
|
return (0);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2002-05-13 04:39:28 +00:00
|
|
|
if (oldhash->uh_hashsize >= newhash->uh_hashsize)
|
|
|
|
return (0);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* I need to investigate hash algorithms for resizing without a
|
|
|
|
* full rehash.
|
|
|
|
*/
|
|
|
|
|
2002-05-13 04:39:28 +00:00
|
|
|
for (i = 0; i < oldhash->uh_hashsize; i++)
|
|
|
|
while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
|
|
|
|
slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
|
|
|
|
SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
|
|
|
|
hval = UMA_HASH(newhash, slab->us_data);
|
|
|
|
SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
|
|
|
|
slab, us_hlink);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2002-05-13 04:39:28 +00:00
|
|
|
return (1);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2002-04-14 13:47:10 +00:00
|
|
|
/*
|
|
|
|
* Free the hash bucket to the appropriate backing store.
|
|
|
|
*
|
|
|
|
* Arguments:
|
|
|
|
* slab_hash The hash bucket we're freeing
|
|
|
|
* hashsize The number of entries in that hash bucket
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* Nothing
|
|
|
|
*/
|
2002-04-08 04:48:58 +00:00
|
|
|
static void
|
2002-05-13 04:39:28 +00:00
|
|
|
hash_free(struct uma_hash *hash)
|
2002-04-08 04:48:58 +00:00
|
|
|
{
|
2002-05-13 04:39:28 +00:00
|
|
|
if (hash->uh_slab_hash == NULL)
|
|
|
|
return;
|
|
|
|
if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(hashzone,
|
2005-07-20 18:47:42 +00:00
|
|
|
hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
|
2002-04-08 04:48:58 +00:00
|
|
|
else
|
2003-09-19 07:23:50 +00:00
|
|
|
free(hash->uh_slab_hash, M_UMAHASH);
|
2002-04-08 04:48:58 +00:00
|
|
|
}
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
|
|
|
* Frees all outstanding items in a bucket
|
|
|
|
*
|
|
|
|
* Arguments:
|
|
|
|
* zone The zone to free to, must be unlocked.
|
|
|
|
* bucket The free/alloc bucket with items, cpu queue must be locked.
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* Nothing
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
|
|
|
|
{
|
|
|
|
void *item;
|
|
|
|
|
|
|
|
if (bucket == NULL)
|
|
|
|
return;
|
|
|
|
|
2003-09-19 06:26:45 +00:00
|
|
|
while (bucket->ub_cnt > 0) {
|
|
|
|
bucket->ub_cnt--;
|
|
|
|
item = bucket->ub_bucket[bucket->ub_cnt];
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef INVARIANTS
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket->ub_bucket[bucket->ub_cnt] = NULL;
|
2002-03-19 09:11:49 +00:00
|
|
|
KASSERT(item != NULL,
|
|
|
|
("bucket_drain: botched ptr, item is NULL"));
|
|
|
|
#endif
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Drains the per cpu caches for a zone.
|
|
|
|
*
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
* NOTE: This may only be called while the zone is being turn down, and not
|
|
|
|
* during normal operation. This is necessary in order that we do not have
|
|
|
|
* to migrate CPUs to drain the per-CPU caches.
|
|
|
|
*
|
2002-03-19 09:11:49 +00:00
|
|
|
* Arguments:
|
2003-07-30 18:55:15 +00:00
|
|
|
* zone The zone to drain, must be unlocked.
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* Nothing
|
|
|
|
*/
|
|
|
|
static void
|
2003-09-19 23:27:46 +00:00
|
|
|
cache_drain(uma_zone_t zone)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
|
|
|
uma_cache_t cache;
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
/*
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
* XXX: It is safe to not lock the per-CPU caches, because we're
|
|
|
|
* tearing down the zone anyway. I.e., there will be no further use
|
|
|
|
* of the caches at this point.
|
|
|
|
*
|
|
|
|
* XXX: It would good to be able to assert that the zone is being
|
|
|
|
* torn down to prevent improper use of cache_drain().
|
|
|
|
*
|
|
|
|
* XXX: We lock the zone before passing into bucket_cache_drain() as
|
|
|
|
* it is used elsewhere. Should the tear-down path be made special
|
|
|
|
* there in some form?
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
2010-06-11 18:46:34 +00:00
|
|
|
CPU_FOREACH(cpu) {
|
2002-03-19 09:11:49 +00:00
|
|
|
cache = &zone->uz_cpu[cpu];
|
|
|
|
bucket_drain(zone, cache->uc_allocbucket);
|
|
|
|
bucket_drain(zone, cache->uc_freebucket);
|
2003-09-19 23:27:46 +00:00
|
|
|
if (cache->uc_allocbucket != NULL)
|
|
|
|
bucket_free(cache->uc_allocbucket);
|
|
|
|
if (cache->uc_freebucket != NULL)
|
|
|
|
bucket_free(cache->uc_freebucket);
|
|
|
|
cache->uc_allocbucket = cache->uc_freebucket = NULL;
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
2004-02-01 06:15:17 +00:00
|
|
|
ZONE_LOCK(zone);
|
|
|
|
bucket_cache_drain(zone);
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Drain the cached buckets from a zone. Expects a locked zone on entry.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
bucket_cache_drain(uma_zone_t zone)
|
|
|
|
{
|
|
|
|
uma_bucket_t bucket;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Drain the bucket queues and free the buckets, we just keep two per
|
|
|
|
* cpu (alloc/free).
|
|
|
|
*/
|
|
|
|
while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
|
|
|
|
LIST_REMOVE(bucket, ub_link);
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
bucket_drain(zone, bucket);
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket_free(bucket);
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_LOCK(zone);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now we do the free queue.. */
|
|
|
|
while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
|
|
|
|
LIST_REMOVE(bucket, ub_link);
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket_free(bucket);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-01-25 09:11:24 +00:00
|
|
|
* Frees pages from a keg back to the system. This is done on demand from
|
2002-03-19 09:11:49 +00:00
|
|
|
* the pageout daemon.
|
|
|
|
*
|
2009-01-25 09:11:24 +00:00
|
|
|
* Returns nothing.
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
2009-01-25 09:11:24 +00:00
|
|
|
static void
|
|
|
|
keg_drain(uma_keg_t keg)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
2005-01-10 20:30:04 +00:00
|
|
|
struct slabhead freeslabs = { 0 };
|
2002-03-19 09:11:49 +00:00
|
|
|
uma_slab_t slab;
|
|
|
|
uma_slab_t n;
|
|
|
|
u_int8_t flags;
|
|
|
|
u_int8_t *mem;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
2009-01-25 09:11:24 +00:00
|
|
|
* We don't want to take pages from statically allocated kegs at this
|
2002-03-19 09:11:49 +00:00
|
|
|
* time
|
|
|
|
*/
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
|
2002-03-19 09:11:49 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
#ifdef UMA_DEBUG
|
2009-01-25 09:11:24 +00:00
|
|
|
printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK(keg);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_free == 0)
|
2002-03-19 09:11:49 +00:00
|
|
|
goto finished;
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
slab = LIST_FIRST(&keg->uk_free_slab);
|
2003-09-19 23:27:46 +00:00
|
|
|
while (slab) {
|
2002-03-19 09:11:49 +00:00
|
|
|
n = LIST_NEXT(slab, us_link);
|
|
|
|
|
|
|
|
/* We have no where to free these to */
|
|
|
|
if (slab->us_flags & UMA_SLAB_BOOT) {
|
|
|
|
slab = n;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
LIST_REMOVE(slab, us_link);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_pages -= keg->uk_ppera;
|
|
|
|
keg->uk_free -= keg->uk_ipers;
|
2002-05-13 05:08:18 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_HASH)
|
|
|
|
UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
|
2002-05-13 05:08:18 +00:00
|
|
|
|
|
|
|
SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
|
|
|
|
|
|
|
|
slab = n;
|
|
|
|
}
|
|
|
|
finished:
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_UNLOCK(keg);
|
2002-05-13 05:08:18 +00:00
|
|
|
|
|
|
|
while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
|
|
|
|
SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_fini)
|
|
|
|
for (i = 0; i < keg->uk_ipers; i++)
|
|
|
|
keg->uk_fini(
|
|
|
|
slab->us_data + (keg->uk_rsize * i),
|
|
|
|
keg->uk_size);
|
2002-03-19 09:11:49 +00:00
|
|
|
flags = slab->us_flags;
|
|
|
|
mem = slab->us_data;
|
2002-09-18 08:26:30 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
|
2005-02-24 06:13:01 +00:00
|
|
|
vm_object_t obj;
|
|
|
|
|
|
|
|
if (flags & UMA_SLAB_KMEM)
|
|
|
|
obj = kmem_object;
|
2008-04-04 18:41:12 +00:00
|
|
|
else if (flags & UMA_SLAB_KERNEL)
|
|
|
|
obj = kernel_object;
|
2005-02-24 06:13:01 +00:00
|
|
|
else
|
|
|
|
obj = NULL;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
for (i = 0; i < keg->uk_ppera; i++)
|
2002-09-18 08:26:30 +00:00
|
|
|
vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
|
2005-02-24 06:13:01 +00:00
|
|
|
obj);
|
2002-11-01 01:01:27 +00:00
|
|
|
}
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(keg->uk_slabzone, slab, NULL,
|
2005-07-20 18:47:42 +00:00
|
|
|
SKIP_NONE, ZFREE_STATFREE);
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("%s: Returning %d bytes.\n",
|
2009-01-25 09:11:24 +00:00
|
|
|
keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
static void
|
|
|
|
zone_drain_wait(uma_zone_t zone, int waitok)
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set draining to interlock with zone_dtor() so we can release our
|
|
|
|
* locks as we go. Only dtor() should do a WAITOK call since it
|
|
|
|
* is the only call that knows the structure will still be available
|
|
|
|
* when it wakes up.
|
|
|
|
*/
|
|
|
|
ZONE_LOCK(zone);
|
|
|
|
while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
|
|
|
|
if (waitok == M_NOWAIT)
|
|
|
|
goto out;
|
|
|
|
mtx_unlock(&uma_mtx);
|
|
|
|
msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
|
|
|
|
mtx_lock(&uma_mtx);
|
|
|
|
}
|
|
|
|
zone->uz_flags |= UMA_ZFLAG_DRAINING;
|
|
|
|
bucket_cache_drain(zone);
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
/*
|
|
|
|
* The DRAINING flag protects us from being freed while
|
|
|
|
* we're running. Normally the uma_mtx would protect us but we
|
|
|
|
* must be able to release and acquire the right lock for each keg.
|
|
|
|
*/
|
|
|
|
zone_foreach_keg(zone, &keg_drain);
|
|
|
|
ZONE_LOCK(zone);
|
|
|
|
zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
|
|
|
|
wakeup(zone);
|
|
|
|
out:
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
zone_drain(uma_zone_t zone)
|
|
|
|
{
|
|
|
|
|
|
|
|
zone_drain_wait(zone, M_NOWAIT);
|
|
|
|
}
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
2009-01-25 09:11:24 +00:00
|
|
|
* Allocate a new slab for a keg. This does not insert the slab onto a list.
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Arguments:
|
|
|
|
* wait Shall we wait?
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* The slab that was allocated or NULL if there is no memory and the
|
|
|
|
* caller specified M_NOWAIT.
|
|
|
|
*/
|
2004-01-30 16:26:29 +00:00
|
|
|
static uma_slab_t
|
2009-01-25 09:11:24 +00:00
|
|
|
keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_slabrefcnt_t slabref;
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_alloc allocf;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_slab_t slab;
|
2002-03-19 09:11:49 +00:00
|
|
|
u_int8_t *mem;
|
|
|
|
u_int8_t flags;
|
|
|
|
int i;
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
mtx_assert(&keg->uk_lock, MA_OWNED);
|
2002-04-08 02:42:55 +00:00
|
|
|
slab = NULL;
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef UMA_DEBUG
|
2009-01-25 09:11:24 +00:00
|
|
|
printf("slab_zalloc: Allocating a new slab for %s\n", keg->uk_name);
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
2009-01-25 09:11:24 +00:00
|
|
|
allocf = keg->uk_allocf;
|
|
|
|
KEG_UNLOCK(keg);
|
2002-04-08 02:42:55 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
|
2009-01-25 09:11:24 +00:00
|
|
|
slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
|
2002-04-08 02:42:55 +00:00
|
|
|
if (slab == NULL) {
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK(keg);
|
2002-04-08 02:42:55 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-06-19 20:49:44 +00:00
|
|
|
/*
|
|
|
|
* This reproduces the old vm_zone behavior of zero filling pages the
|
|
|
|
* first time they are added to a zone.
|
|
|
|
*
|
|
|
|
* Malloced items are zeroed in uma_zalloc.
|
|
|
|
*/
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
|
2002-06-19 20:49:44 +00:00
|
|
|
wait |= M_ZERO;
|
|
|
|
else
|
|
|
|
wait &= ~M_ZERO;
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
/* zone is passed for legacy reasons. */
|
|
|
|
mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
|
2003-09-19 08:53:33 +00:00
|
|
|
if (mem == NULL) {
|
2004-08-02 00:18:36 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(keg->uk_slabzone, slab, NULL,
|
2005-07-20 18:47:42 +00:00
|
|
|
SKIP_NONE, ZFREE_STATFREE);
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK(keg);
|
2003-09-19 08:53:33 +00:00
|
|
|
return (NULL);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2002-06-25 21:04:50 +00:00
|
|
|
/* Point the slab into the allocated memory */
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
|
|
|
|
slab = (uma_slab_t )(mem + keg->uk_pgoff);
|
2002-06-25 21:04:50 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_VTOSLAB)
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
for (i = 0; i < keg->uk_ppera; i++)
|
2002-09-18 08:26:30 +00:00
|
|
|
vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
slab->us_keg = keg;
|
2002-03-19 09:11:49 +00:00
|
|
|
slab->us_data = mem;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
slab->us_freecount = keg->uk_ipers;
|
2002-03-19 09:11:49 +00:00
|
|
|
slab->us_firstfree = 0;
|
|
|
|
slab->us_flags = flags;
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_REFCNT) {
|
|
|
|
slabref = (uma_slabrefcnt_t)slab;
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
for (i = 0; i < keg->uk_ipers; i++) {
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
slabref->us_freelist[i].us_refcnt = 0;
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
slabref->us_freelist[i].us_item = i+1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < keg->uk_ipers; i++)
|
|
|
|
slab->us_freelist[i].us_item = i+1;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
}
|
|
|
|
|
2004-08-02 00:18:36 +00:00
|
|
|
if (keg->uk_init != NULL) {
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
for (i = 0; i < keg->uk_ipers; i++)
|
2004-08-02 00:18:36 +00:00
|
|
|
if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
|
|
|
|
keg->uk_size, wait) != 0)
|
|
|
|
break;
|
|
|
|
if (i != keg->uk_ipers) {
|
|
|
|
if (keg->uk_fini != NULL) {
|
|
|
|
for (i--; i > -1; i--)
|
|
|
|
keg->uk_fini(slab->us_data +
|
|
|
|
(keg->uk_rsize * i),
|
|
|
|
keg->uk_size);
|
|
|
|
}
|
2009-01-25 09:11:24 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
|
2005-02-24 06:13:01 +00:00
|
|
|
vm_object_t obj;
|
|
|
|
|
|
|
|
if (flags & UMA_SLAB_KMEM)
|
|
|
|
obj = kmem_object;
|
2008-04-04 18:41:12 +00:00
|
|
|
else if (flags & UMA_SLAB_KERNEL)
|
|
|
|
obj = kernel_object;
|
2005-02-24 06:13:01 +00:00
|
|
|
else
|
|
|
|
obj = NULL;
|
2004-08-02 00:18:36 +00:00
|
|
|
for (i = 0; i < keg->uk_ppera; i++)
|
|
|
|
vsetobj((vm_offset_t)mem +
|
2005-02-24 06:13:01 +00:00
|
|
|
(i * PAGE_SIZE), obj);
|
|
|
|
}
|
2004-08-02 00:18:36 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(keg->uk_slabzone, slab,
|
2005-07-20 18:47:42 +00:00
|
|
|
NULL, SKIP_NONE, ZFREE_STATFREE);
|
2004-08-02 00:18:36 +00:00
|
|
|
keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
|
|
|
|
flags);
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK(keg);
|
2004-08-02 00:18:36 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK(keg);
|
2002-06-25 21:04:50 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_HASH)
|
|
|
|
UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_pages += keg->uk_ppera;
|
|
|
|
keg->uk_free += keg->uk_ipers;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
return (slab);
|
|
|
|
}
|
|
|
|
|
2003-09-21 07:39:16 +00:00
|
|
|
/*
|
|
|
|
* This function is intended to be used early on in place of page_alloc() so
|
|
|
|
* that we may use the boot time page cache to satisfy allocations before
|
|
|
|
* the VM is ready.
|
|
|
|
*/
|
|
|
|
static void *
|
|
|
|
startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
|
|
|
|
{
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
2005-09-09 06:03:08 +00:00
|
|
|
uma_slab_t tmps;
|
2010-11-04 15:33:50 +00:00
|
|
|
int pages, check_pages;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone);
|
2010-11-04 15:33:50 +00:00
|
|
|
pages = howmany(bytes, PAGE_SIZE);
|
|
|
|
check_pages = pages - 1;
|
|
|
|
KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2003-09-21 07:39:16 +00:00
|
|
|
/*
|
|
|
|
* Check our small startup cache to see if it has pages remaining.
|
|
|
|
*/
|
2005-09-09 06:03:08 +00:00
|
|
|
mtx_lock(&uma_boot_pages_mtx);
|
2010-11-04 15:33:50 +00:00
|
|
|
|
|
|
|
/* First check if we have enough room. */
|
|
|
|
tmps = LIST_FIRST(&uma_boot_pages);
|
|
|
|
while (tmps != NULL && check_pages-- > 0)
|
|
|
|
tmps = LIST_NEXT(tmps, us_link);
|
|
|
|
if (tmps != NULL) {
|
|
|
|
/*
|
|
|
|
* It's ok to lose tmps references. The last one will
|
|
|
|
* have tmps->us_data pointing to the start address of
|
|
|
|
* "pages" contiguous pages of memory.
|
|
|
|
*/
|
|
|
|
while (pages-- > 0) {
|
|
|
|
tmps = LIST_FIRST(&uma_boot_pages);
|
|
|
|
LIST_REMOVE(tmps, us_link);
|
|
|
|
}
|
2005-09-09 06:03:08 +00:00
|
|
|
mtx_unlock(&uma_boot_pages_mtx);
|
2003-09-21 07:39:16 +00:00
|
|
|
*pflag = tmps->us_flags;
|
|
|
|
return (tmps->us_data);
|
|
|
|
}
|
2005-09-09 06:03:08 +00:00
|
|
|
mtx_unlock(&uma_boot_pages_mtx);
|
1. Prior to r214782, UMA did not support multipage allocations before
uma_startup2() was called. Thus, setting the variable "booted" to true in
uma_startup() was ok on machines with UMA_MD_SMALL_ALLOC defined, because
any allocations made after uma_startup() but before uma_startup2() could be
satisfied by uma_small_alloc(). Now, however, some multipage allocations
are necessary before uma_startup2() just to allocate zone structures on
machines with a large number of processors. Thus, a Boolean can no longer
effectively describe the state of the UMA allocator. Instead, make "booted"
have three values to describe how far initialization has progressed. This
allows multipage allocations to continue using startup_alloc() until
uma_startup2(), but single-page allocations may begin using
uma_small_alloc() after uma_startup().
2. With the aforementioned change, only a modest increase in boot pages is
necessary to boot UMA on a large number of processors.
3. Retire UMA_MD_SMALL_ALLOC_NEEDS_VM. It has only been used between
r182028 and r204128.
Reviewed by: attilio [1], nwhitehorn [3]
Tested by: sbruno
2011-05-21 17:43:43 +00:00
|
|
|
if (booted < UMA_STARTUP2)
|
2005-10-08 21:03:54 +00:00
|
|
|
panic("UMA: Increase vm.boot_pages");
|
2003-09-21 07:39:16 +00:00
|
|
|
/*
|
|
|
|
* Now that we've booted reset these users to their real allocator.
|
|
|
|
*/
|
|
|
|
#ifdef UMA_MD_SMALL_ALLOC
|
2010-11-04 15:33:50 +00:00
|
|
|
keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
|
2003-09-21 07:39:16 +00:00
|
|
|
#else
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_allocf = page_alloc;
|
2003-09-21 07:39:16 +00:00
|
|
|
#endif
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
return keg->uk_allocf(zone, bytes, pflag, wait);
|
2003-09-21 07:39:16 +00:00
|
|
|
}
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
|
|
|
* Allocates a number of pages from the system
|
|
|
|
*
|
|
|
|
* Arguments:
|
|
|
|
* bytes The number of bytes requested
|
|
|
|
* wait Shall we wait?
|
|
|
|
*
|
|
|
|
* Returns:
|
2004-01-30 16:26:29 +00:00
|
|
|
* A pointer to the alloced memory or possibly
|
2002-03-19 09:11:49 +00:00
|
|
|
* NULL if M_NOWAIT is set.
|
|
|
|
*/
|
|
|
|
static void *
|
|
|
|
page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
|
|
|
|
{
|
|
|
|
void *p; /* Returned page */
|
|
|
|
|
2002-06-19 20:49:44 +00:00
|
|
|
*pflag = UMA_SLAB_KMEM;
|
|
|
|
p = (void *) kmem_malloc(kmem_map, bytes, wait);
|
2004-01-30 16:26:29 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
return (p);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocates a number of pages from within an object
|
|
|
|
*
|
|
|
|
* Arguments:
|
|
|
|
* bytes The number of bytes requested
|
|
|
|
* wait Shall we wait?
|
|
|
|
*
|
|
|
|
* Returns:
|
2004-01-30 16:26:29 +00:00
|
|
|
* A pointer to the alloced memory or possibly
|
2002-03-19 09:11:49 +00:00
|
|
|
* NULL if M_NOWAIT is set.
|
|
|
|
*/
|
|
|
|
static void *
|
|
|
|
obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
|
|
|
|
{
|
2003-08-03 06:08:48 +00:00
|
|
|
vm_object_t object;
|
|
|
|
vm_offset_t retkva, zkva;
|
2002-03-19 09:11:49 +00:00
|
|
|
vm_page_t p;
|
2003-08-03 06:08:48 +00:00
|
|
|
int pages, startpages;
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_keg_t keg;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone);
|
|
|
|
object = keg->uk_obj;
|
2002-08-21 23:39:52 +00:00
|
|
|
retkva = 0;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2004-01-30 16:26:29 +00:00
|
|
|
/*
|
2003-09-19 22:31:45 +00:00
|
|
|
* This looks a little weird since we're getting one page at a time.
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
2003-08-03 06:08:48 +00:00
|
|
|
VM_OBJECT_LOCK(object);
|
|
|
|
p = TAILQ_LAST(&object->memq, pglist);
|
|
|
|
pages = p != NULL ? p->pindex + 1 : 0;
|
|
|
|
startpages = pages;
|
2009-01-25 09:11:24 +00:00
|
|
|
zkva = keg->uk_kva + pages * PAGE_SIZE;
|
2003-08-03 06:08:48 +00:00
|
|
|
for (; bytes > 0; bytes -= PAGE_SIZE) {
|
|
|
|
p = vm_page_alloc(object, pages,
|
|
|
|
VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
|
|
|
|
if (p == NULL) {
|
|
|
|
if (pages != startpages)
|
|
|
|
pmap_qremove(retkva, pages - startpages);
|
|
|
|
while (pages != startpages) {
|
|
|
|
pages--;
|
|
|
|
p = TAILQ_LAST(&object->memq, pglist);
|
|
|
|
vm_page_unwire(p, 0);
|
|
|
|
vm_page_free(p);
|
|
|
|
}
|
|
|
|
retkva = 0;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
pmap_qenter(zkva, &p, 1);
|
2002-08-21 23:39:52 +00:00
|
|
|
if (retkva == 0)
|
2002-03-19 09:11:49 +00:00
|
|
|
retkva = zkva;
|
2003-08-03 06:08:48 +00:00
|
|
|
zkva += PAGE_SIZE;
|
2002-03-19 09:11:49 +00:00
|
|
|
pages += 1;
|
|
|
|
}
|
2003-08-03 06:08:48 +00:00
|
|
|
done:
|
|
|
|
VM_OBJECT_UNLOCK(object);
|
2002-03-19 09:11:49 +00:00
|
|
|
*flags = UMA_SLAB_PRIV;
|
|
|
|
|
|
|
|
return ((void *)retkva);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Frees a number of pages to the system
|
2004-01-30 16:26:29 +00:00
|
|
|
*
|
2002-03-19 09:11:49 +00:00
|
|
|
* Arguments:
|
|
|
|
* mem A pointer to the memory to be freed
|
|
|
|
* size The size of the memory being freed
|
|
|
|
* flags The original p->us_flags field
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* Nothing
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
page_free(void *mem, int size, u_int8_t flags)
|
|
|
|
{
|
|
|
|
vm_map_t map;
|
2002-06-19 20:49:44 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
if (flags & UMA_SLAB_KMEM)
|
|
|
|
map = kmem_map;
|
2009-06-18 07:27:11 +00:00
|
|
|
else if (flags & UMA_SLAB_KERNEL)
|
|
|
|
map = kernel_map;
|
2002-03-19 09:11:49 +00:00
|
|
|
else
|
2009-06-18 07:27:11 +00:00
|
|
|
panic("UMA: page_free used with invalid flags %d", flags);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
kmem_free(map, (vm_offset_t)mem, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Zero fill initializer
|
|
|
|
*
|
|
|
|
* Arguments/Returns follow uma_init specifications
|
|
|
|
*/
|
2004-08-02 00:18:36 +00:00
|
|
|
static int
|
|
|
|
zero_init(void *mem, int size, int flags)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
|
|
|
bzero(mem, size);
|
2004-08-02 00:18:36 +00:00
|
|
|
return (0);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-01-25 09:11:24 +00:00
|
|
|
* Finish creating a small uma keg. This calculates ipers, and the keg size.
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Arguments
|
2009-01-25 09:11:24 +00:00
|
|
|
* keg The zone we should initialize
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Returns
|
|
|
|
* Nothing
|
|
|
|
*/
|
|
|
|
static void
|
2009-01-25 09:11:24 +00:00
|
|
|
keg_small_init(uma_keg_t keg)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
2004-07-29 15:25:40 +00:00
|
|
|
u_int rsize;
|
|
|
|
u_int memused;
|
|
|
|
u_int wastedspace;
|
|
|
|
u_int shsize;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
rsize = keg->uk_size;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
if (rsize < UMA_SMALLEST_UNIT)
|
|
|
|
rsize = UMA_SMALLEST_UNIT;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (rsize & keg->uk_align)
|
|
|
|
rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_rsize = rsize;
|
|
|
|
keg->uk_ppera = 1;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2004-07-29 15:25:40 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_REFCNT) {
|
|
|
|
rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
|
|
|
|
shsize = sizeof(struct uma_slab_refcnt);
|
|
|
|
} else {
|
|
|
|
rsize += UMA_FRITM_SZ; /* Account for linkage */
|
|
|
|
shsize = sizeof(struct uma_slab);
|
|
|
|
}
|
|
|
|
|
|
|
|
keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
|
2009-01-25 09:11:24 +00:00
|
|
|
KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
|
2004-07-29 15:25:40 +00:00
|
|
|
memused = keg->uk_ipers * rsize + shsize;
|
|
|
|
wastedspace = UMA_SLAB_SIZE - memused;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2004-07-29 15:25:40 +00:00
|
|
|
/*
|
|
|
|
* We can't do OFFPAGE if we're internal or if we've been
|
|
|
|
* asked to not go to the VM for buckets. If we do this we
|
|
|
|
* may end up going to the VM (kmem_map) for slabs which we
|
|
|
|
* do not want to do if we're UMA_ZFLAG_CACHEONLY as a
|
|
|
|
* result of UMA_ZONE_VM, which clearly forbids it.
|
|
|
|
*/
|
|
|
|
if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
|
|
|
|
(keg->uk_flags & UMA_ZFLAG_CACHEONLY))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if ((wastedspace >= UMA_MAX_WASTE) &&
|
|
|
|
(keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
|
|
|
|
keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
|
|
|
|
KASSERT(keg->uk_ipers <= 255,
|
2009-01-25 09:11:24 +00:00
|
|
|
("keg_small_init: keg->uk_ipers too high!"));
|
2004-07-29 15:25:40 +00:00
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("UMA decided we need offpage slab headers for "
|
2009-01-25 09:11:24 +00:00
|
|
|
"keg: %s, calculated wastedspace = %d, "
|
2004-07-29 15:25:40 +00:00
|
|
|
"maximum wasted space allowed = %d, "
|
|
|
|
"calculated ipers = %d, "
|
2009-01-25 09:11:24 +00:00
|
|
|
"new wasted space = %d\n", keg->uk_name, wastedspace,
|
2004-07-29 15:25:40 +00:00
|
|
|
UMA_MAX_WASTE, keg->uk_ipers,
|
|
|
|
UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
|
|
|
|
#endif
|
|
|
|
keg->uk_flags |= UMA_ZONE_OFFPAGE;
|
2009-01-25 09:11:24 +00:00
|
|
|
if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
|
2004-07-29 15:25:40 +00:00
|
|
|
keg->uk_flags |= UMA_ZONE_HASH;
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-01-25 09:11:24 +00:00
|
|
|
* Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
|
2002-03-19 09:11:49 +00:00
|
|
|
* OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
|
|
|
|
* more complicated.
|
|
|
|
*
|
|
|
|
* Arguments
|
2009-01-25 09:11:24 +00:00
|
|
|
* keg The keg we should initialize
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Returns
|
|
|
|
* Nothing
|
|
|
|
*/
|
|
|
|
static void
|
2009-01-25 09:11:24 +00:00
|
|
|
keg_large_init(uma_keg_t keg)
|
2004-01-30 16:26:29 +00:00
|
|
|
{
|
2002-03-19 09:11:49 +00:00
|
|
|
int pages;
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
|
2009-01-25 09:11:24 +00:00
|
|
|
("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
|
2003-08-11 19:39:45 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
pages = keg->uk_size / UMA_SLAB_SIZE;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/* Account for remainder */
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
|
2002-03-19 09:11:49 +00:00
|
|
|
pages++;
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_ppera = pages;
|
|
|
|
keg->uk_ipers = 1;
|
2010-11-04 15:33:50 +00:00
|
|
|
keg->uk_rsize = keg->uk_size;
|
|
|
|
|
|
|
|
/* We can't do OFFPAGE if we're internal, bail out here. */
|
|
|
|
if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
|
|
|
|
return;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_flags |= UMA_ZONE_OFFPAGE;
|
2009-01-25 09:11:24 +00:00
|
|
|
if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_flags |= UMA_ZONE_HASH;
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
static void
|
|
|
|
keg_cachespread_init(uma_keg_t keg)
|
|
|
|
{
|
|
|
|
int alignsize;
|
|
|
|
int trailer;
|
|
|
|
int pages;
|
|
|
|
int rsize;
|
|
|
|
|
|
|
|
alignsize = keg->uk_align + 1;
|
|
|
|
rsize = keg->uk_size;
|
|
|
|
/*
|
|
|
|
* We want one item to start on every align boundary in a page. To
|
|
|
|
* do this we will span pages. We will also extend the item by the
|
|
|
|
* size of align if it is an even multiple of align. Otherwise, it
|
|
|
|
* would fall on the same boundary every time.
|
|
|
|
*/
|
|
|
|
if (rsize & keg->uk_align)
|
|
|
|
rsize = (rsize & ~keg->uk_align) + alignsize;
|
|
|
|
if ((rsize & alignsize) == 0)
|
|
|
|
rsize += alignsize;
|
|
|
|
trailer = rsize - keg->uk_size;
|
|
|
|
pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
|
|
|
|
pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
|
|
|
|
keg->uk_rsize = rsize;
|
|
|
|
keg->uk_ppera = pages;
|
|
|
|
keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
|
|
|
|
keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
|
|
|
|
KASSERT(keg->uk_ipers <= uma_max_ipers,
|
|
|
|
("keg_small_init: keg->uk_ipers too high(%d) increase max_ipers",
|
|
|
|
keg->uk_ipers));
|
|
|
|
}
|
|
|
|
|
2004-01-30 16:26:29 +00:00
|
|
|
/*
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
* Keg header ctor. This initializes all fields, locks, etc. And inserts
|
|
|
|
* the keg onto the global keg list.
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Arguments/Returns follow uma_ctor specifications
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
* udata Actually uma_kctor_args
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
2004-08-02 00:18:36 +00:00
|
|
|
static int
|
|
|
|
keg_ctor(void *mem, int size, void *udata, int flags)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
struct uma_kctor_args *arg = udata;
|
|
|
|
uma_keg_t keg = mem;
|
|
|
|
uma_zone_t zone;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
bzero(keg, size);
|
|
|
|
keg->uk_size = arg->size;
|
|
|
|
keg->uk_init = arg->uminit;
|
|
|
|
keg->uk_fini = arg->fini;
|
|
|
|
keg->uk_align = arg->align;
|
|
|
|
keg->uk_free = 0;
|
|
|
|
keg->uk_pages = 0;
|
|
|
|
keg->uk_flags = arg->flags;
|
|
|
|
keg->uk_allocf = page_alloc;
|
|
|
|
keg->uk_freef = page_free;
|
|
|
|
keg->uk_recurse = 0;
|
|
|
|
keg->uk_slabzone = NULL;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/*
|
|
|
|
* The master zone is passed to us at keg-creation time.
|
|
|
|
*/
|
|
|
|
zone = arg->zone;
|
2009-01-25 09:11:24 +00:00
|
|
|
keg->uk_name = zone->uz_name;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2002-06-17 22:02:41 +00:00
|
|
|
if (arg->flags & UMA_ZONE_VM)
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
|
|
|
|
|
|
|
|
if (arg->flags & UMA_ZONE_ZINIT)
|
|
|
|
keg->uk_init = zero_init;
|
2002-06-17 22:02:41 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
|
|
|
|
keg->uk_flags |= UMA_ZONE_VTOSLAB;
|
|
|
|
|
2003-08-11 19:39:45 +00:00
|
|
|
/*
|
2004-07-29 15:25:40 +00:00
|
|
|
* The +UMA_FRITM_SZ added to uk_size is to account for the
|
2009-01-25 09:11:24 +00:00
|
|
|
* linkage that is added to the size in keg_small_init(). If
|
2003-08-11 19:39:45 +00:00
|
|
|
* we don't account for this here then we may end up in
|
2009-01-25 09:11:24 +00:00
|
|
|
* keg_small_init() with a calculated 'ipers' of 0.
|
2003-08-11 19:39:45 +00:00
|
|
|
*/
|
2004-07-29 15:25:40 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_REFCNT) {
|
2009-01-25 09:11:24 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
|
|
|
|
keg_cachespread_init(keg);
|
|
|
|
else if ((keg->uk_size+UMA_FRITMREF_SZ) >
|
2004-07-29 15:25:40 +00:00
|
|
|
(UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
|
2009-01-25 09:11:24 +00:00
|
|
|
keg_large_init(keg);
|
2004-07-29 15:25:40 +00:00
|
|
|
else
|
2009-01-25 09:11:24 +00:00
|
|
|
keg_small_init(keg);
|
2004-07-29 15:25:40 +00:00
|
|
|
} else {
|
2009-01-25 09:11:24 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
|
|
|
|
keg_cachespread_init(keg);
|
|
|
|
else if ((keg->uk_size+UMA_FRITM_SZ) >
|
2004-07-29 15:25:40 +00:00
|
|
|
(UMA_SLAB_SIZE - sizeof(struct uma_slab)))
|
2009-01-25 09:11:24 +00:00
|
|
|
keg_large_init(keg);
|
2004-07-29 15:25:40 +00:00
|
|
|
else
|
2009-01-25 09:11:24 +00:00
|
|
|
keg_small_init(keg);
|
2004-07-29 15:25:40 +00:00
|
|
|
}
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2004-07-29 15:25:40 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
|
|
|
|
if (keg->uk_flags & UMA_ZONE_REFCNT)
|
|
|
|
keg->uk_slabzone = slabrefzone;
|
|
|
|
else
|
|
|
|
keg->uk_slabzone = slabzone;
|
|
|
|
}
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2003-09-21 07:39:16 +00:00
|
|
|
/*
|
|
|
|
* If we haven't booted yet we need allocations to go through the
|
|
|
|
* startup cache until the vm is ready.
|
|
|
|
*/
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_ppera == 1) {
|
2003-09-21 07:39:16 +00:00
|
|
|
#ifdef UMA_MD_SMALL_ALLOC
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_allocf = uma_small_alloc;
|
|
|
|
keg->uk_freef = uma_small_free;
|
2011-05-22 17:46:16 +00:00
|
|
|
|
1. Prior to r214782, UMA did not support multipage allocations before
uma_startup2() was called. Thus, setting the variable "booted" to true in
uma_startup() was ok on machines with UMA_MD_SMALL_ALLOC defined, because
any allocations made after uma_startup() but before uma_startup2() could be
satisfied by uma_small_alloc(). Now, however, some multipage allocations
are necessary before uma_startup2() just to allocate zone structures on
machines with a large number of processors. Thus, a Boolean can no longer
effectively describe the state of the UMA allocator. Instead, make "booted"
have three values to describe how far initialization has progressed. This
allows multipage allocations to continue using startup_alloc() until
uma_startup2(), but single-page allocations may begin using
uma_small_alloc() after uma_startup().
2. With the aforementioned change, only a modest increase in boot pages is
necessary to boot UMA on a large number of processors.
3. Retire UMA_MD_SMALL_ALLOC_NEEDS_VM. It has only been used between
r182028 and r204128.
Reviewed by: attilio [1], nwhitehorn [3]
Tested by: sbruno
2011-05-21 17:43:43 +00:00
|
|
|
if (booted < UMA_STARTUP)
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_allocf = startup_alloc;
|
2011-05-22 17:46:16 +00:00
|
|
|
#else
|
|
|
|
if (booted < UMA_STARTUP2)
|
|
|
|
keg->uk_allocf = startup_alloc;
|
|
|
|
#endif
|
1. Prior to r214782, UMA did not support multipage allocations before
uma_startup2() was called. Thus, setting the variable "booted" to true in
uma_startup() was ok on machines with UMA_MD_SMALL_ALLOC defined, because
any allocations made after uma_startup() but before uma_startup2() could be
satisfied by uma_small_alloc(). Now, however, some multipage allocations
are necessary before uma_startup2() just to allocate zone structures on
machines with a large number of processors. Thus, a Boolean can no longer
effectively describe the state of the UMA allocator. Instead, make "booted"
have three values to describe how far initialization has progressed. This
allows multipage allocations to continue using startup_alloc() until
uma_startup2(), but single-page allocations may begin using
uma_small_alloc() after uma_startup().
2. With the aforementioned change, only a modest increase in boot pages is
necessary to boot UMA on a large number of processors.
3. Retire UMA_MD_SMALL_ALLOC_NEEDS_VM. It has only been used between
r182028 and r204128.
Reviewed by: attilio [1], nwhitehorn [3]
Tested by: sbruno
2011-05-21 17:43:43 +00:00
|
|
|
} else if (booted < UMA_STARTUP2 &&
|
|
|
|
(keg->uk_flags & UMA_ZFLAG_INTERNAL))
|
2010-11-04 15:33:50 +00:00
|
|
|
keg->uk_allocf = startup_alloc;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
|
|
|
/*
|
2009-01-25 09:11:24 +00:00
|
|
|
* Initialize keg's lock (shared among zones).
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
*/
|
2002-04-29 23:45:41 +00:00
|
|
|
if (arg->flags & UMA_ZONE_MTXCLASS)
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK_INIT(keg, 1);
|
2002-04-29 23:45:41 +00:00
|
|
|
else
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK_INIT(keg, 0);
|
2002-04-29 23:45:41 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
|
|
|
* If we're putting the slab header in the actual page we need to
|
2004-01-30 16:26:29 +00:00
|
|
|
* figure out where in each page it goes. This calculates a right
|
2003-01-01 18:49:04 +00:00
|
|
|
* justified offset into the memory on an ALIGN_PTR boundary.
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
|
2004-07-29 15:25:40 +00:00
|
|
|
u_int totsize;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/* Size of the slab struct and free list */
|
2004-07-29 15:25:40 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_REFCNT)
|
|
|
|
totsize = sizeof(struct uma_slab_refcnt) +
|
|
|
|
keg->uk_ipers * UMA_FRITMREF_SZ;
|
|
|
|
else
|
|
|
|
totsize = sizeof(struct uma_slab) +
|
|
|
|
keg->uk_ipers * UMA_FRITM_SZ;
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
if (totsize & UMA_ALIGN_PTR)
|
|
|
|
totsize = (totsize & ~UMA_ALIGN_PTR) +
|
|
|
|
(UMA_ALIGN_PTR + 1);
|
2010-11-04 15:33:50 +00:00
|
|
|
keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
|
2004-07-29 15:25:40 +00:00
|
|
|
|
|
|
|
if (keg->uk_flags & UMA_ZONE_REFCNT)
|
|
|
|
totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
|
|
|
|
+ keg->uk_ipers * UMA_FRITMREF_SZ;
|
|
|
|
else
|
|
|
|
totsize = keg->uk_pgoff + sizeof(struct uma_slab)
|
|
|
|
+ keg->uk_ipers * UMA_FRITM_SZ;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The only way the following is possible is if with our
|
|
|
|
* UMA_ALIGN_PTR adjustments we are now bigger than
|
|
|
|
* UMA_SLAB_SIZE. I haven't checked whether this is
|
|
|
|
* mathematically possible for all cases, so we make
|
|
|
|
* sure here anyway.
|
|
|
|
*/
|
2010-11-04 15:33:50 +00:00
|
|
|
if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
|
2002-03-19 09:11:49 +00:00
|
|
|
printf("zone %s ipers %d rsize %d size %d\n",
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
zone->uz_name, keg->uk_ipers, keg->uk_rsize,
|
|
|
|
keg->uk_size);
|
2009-06-18 07:27:11 +00:00
|
|
|
panic("UMA slab won't fit.");
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_HASH)
|
|
|
|
hash_alloc(&keg->uk_hash);
|
2002-09-18 08:26:30 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef UMA_DEBUG
|
2009-01-25 09:11:24 +00:00
|
|
|
printf("UMA: %s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
|
|
|
|
zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
|
|
|
|
keg->uk_ipers, keg->uk_ppera,
|
|
|
|
(keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
|
|
|
LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
mtx_lock(&uma_mtx);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
|
2002-03-19 09:11:49 +00:00
|
|
|
mtx_unlock(&uma_mtx);
|
2004-08-02 00:18:36 +00:00
|
|
|
return (0);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Zone header ctor. This initializes all fields, locks, etc.
|
|
|
|
*
|
|
|
|
* Arguments/Returns follow uma_ctor specifications
|
|
|
|
* udata Actually uma_zctor_args
|
|
|
|
*/
|
2004-08-02 00:18:36 +00:00
|
|
|
static int
|
|
|
|
zone_ctor(void *mem, int size, void *udata, int flags)
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
{
|
|
|
|
struct uma_zctor_args *arg = udata;
|
|
|
|
uma_zone_t zone = mem;
|
|
|
|
uma_zone_t z;
|
|
|
|
uma_keg_t keg;
|
|
|
|
|
|
|
|
bzero(zone, size);
|
|
|
|
zone->uz_name = arg->name;
|
|
|
|
zone->uz_ctor = arg->ctor;
|
|
|
|
zone->uz_dtor = arg->dtor;
|
2009-01-25 09:11:24 +00:00
|
|
|
zone->uz_slab = zone_fetch_slab;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
zone->uz_init = NULL;
|
|
|
|
zone->uz_fini = NULL;
|
|
|
|
zone->uz_allocs = 0;
|
2005-07-14 16:17:21 +00:00
|
|
|
zone->uz_frees = 0;
|
2005-07-15 23:34:39 +00:00
|
|
|
zone->uz_fails = 0;
|
2010-06-15 19:28:37 +00:00
|
|
|
zone->uz_sleeps = 0;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
zone->uz_fills = zone->uz_count = 0;
|
2009-01-25 09:11:24 +00:00
|
|
|
zone->uz_flags = 0;
|
|
|
|
keg = arg->keg;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
|
|
|
if (arg->flags & UMA_ZONE_SECONDARY) {
|
|
|
|
KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
|
|
|
|
zone->uz_init = arg->uminit;
|
|
|
|
zone->uz_fini = arg->fini;
|
|
|
|
zone->uz_lock = &keg->uk_lock;
|
2009-01-25 09:11:24 +00:00
|
|
|
zone->uz_flags |= UMA_ZONE_SECONDARY;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
mtx_lock(&uma_mtx);
|
|
|
|
ZONE_LOCK(zone);
|
|
|
|
LIST_FOREACH(z, &keg->uk_zones, uz_link) {
|
|
|
|
if (LIST_NEXT(z, uz_link) == NULL) {
|
|
|
|
LIST_INSERT_AFTER(z, zone, uz_link);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
mtx_unlock(&uma_mtx);
|
2009-01-25 09:11:24 +00:00
|
|
|
} else if (keg == NULL) {
|
|
|
|
if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
|
|
|
|
arg->align, arg->flags)) == NULL)
|
2004-08-02 00:18:36 +00:00
|
|
|
return (ENOMEM);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
} else {
|
|
|
|
struct uma_kctor_args karg;
|
2004-08-02 00:18:36 +00:00
|
|
|
int error;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
|
|
|
/* We should only be here from uma_startup() */
|
|
|
|
karg.size = arg->size;
|
|
|
|
karg.uminit = arg->uminit;
|
|
|
|
karg.fini = arg->fini;
|
|
|
|
karg.align = arg->align;
|
|
|
|
karg.flags = arg->flags;
|
|
|
|
karg.zone = zone;
|
2004-08-02 00:18:36 +00:00
|
|
|
error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
|
|
|
|
flags);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
}
|
2009-01-25 09:11:24 +00:00
|
|
|
/*
|
|
|
|
* Link in the first keg.
|
|
|
|
*/
|
|
|
|
zone->uz_klink.kl_keg = keg;
|
|
|
|
LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
zone->uz_lock = &keg->uk_lock;
|
2009-01-25 09:11:24 +00:00
|
|
|
zone->uz_size = keg->uk_size;
|
|
|
|
zone->uz_flags |= (keg->uk_flags &
|
|
|
|
(UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Some internal zones don't have room allocated for the per cpu
|
|
|
|
* caches. If we're internal, bail out here.
|
|
|
|
*/
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
|
2009-01-25 09:11:24 +00:00
|
|
|
KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
("Secondary zone requested UMA_ZFLAG_INTERNAL"));
|
2004-08-02 00:18:36 +00:00
|
|
|
return (0);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
|
|
|
|
zone->uz_count = BUCKET_MAX;
|
|
|
|
else if (keg->uk_ipers <= BUCKET_MAX)
|
|
|
|
zone->uz_count = keg->uk_ipers;
|
2002-04-08 02:42:55 +00:00
|
|
|
else
|
2003-09-19 06:26:45 +00:00
|
|
|
zone->uz_count = BUCKET_MAX;
|
2004-08-02 00:18:36 +00:00
|
|
|
return (0);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2004-01-30 16:26:29 +00:00
|
|
|
/*
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
* Keg header dtor. This frees all data, destroys locks, frees the hash
|
|
|
|
* table and removes the keg from the global list.
|
2002-04-08 04:48:58 +00:00
|
|
|
*
|
|
|
|
* Arguments/Returns follow uma_dtor specifications
|
|
|
|
* udata unused
|
|
|
|
*/
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
static void
|
|
|
|
keg_dtor(void *arg, int size, void *udata)
|
|
|
|
{
|
|
|
|
uma_keg_t keg;
|
2002-04-08 04:48:58 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg = (uma_keg_t)arg;
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK(keg);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_free != 0) {
|
|
|
|
printf("Freed UMA keg was not empty (%d items). "
|
|
|
|
" Lost %d pages of memory.\n",
|
|
|
|
keg->uk_free, keg->uk_pages);
|
|
|
|
}
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_UNLOCK(keg);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
hash_free(&keg->uk_hash);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
KEG_LOCK_FINI(keg);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Zone header dtor.
|
|
|
|
*
|
|
|
|
* Arguments/Returns follow uma_dtor specifications
|
|
|
|
* udata unused
|
|
|
|
*/
|
2002-04-08 04:48:58 +00:00
|
|
|
static void
|
|
|
|
zone_dtor(void *arg, int size, void *udata)
|
|
|
|
{
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_klink_t klink;
|
2002-04-08 04:48:58 +00:00
|
|
|
uma_zone_t zone;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
2002-04-08 04:48:58 +00:00
|
|
|
|
|
|
|
zone = (uma_zone_t)arg;
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone);
|
2003-09-19 23:27:46 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
|
2003-09-19 23:27:46 +00:00
|
|
|
cache_drain(zone);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2002-07-05 21:39:52 +00:00
|
|
|
mtx_lock(&uma_mtx);
|
2009-01-25 09:11:24 +00:00
|
|
|
LIST_REMOVE(zone, uz_link);
|
|
|
|
mtx_unlock(&uma_mtx);
|
|
|
|
/*
|
|
|
|
* XXX there are some races here where
|
|
|
|
* the zone can be drained but zone lock
|
|
|
|
* released and then refilled before we
|
|
|
|
* remove it... we dont care for now
|
|
|
|
*/
|
|
|
|
zone_drain_wait(zone, M_WAITOK);
|
|
|
|
/*
|
|
|
|
* Unlink all of our kegs.
|
|
|
|
*/
|
|
|
|
while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
|
|
|
|
klink->kl_keg = NULL;
|
|
|
|
LIST_REMOVE(klink, kl_link);
|
|
|
|
if (klink == &zone->uz_klink)
|
|
|
|
continue;
|
|
|
|
free(klink, M_TEMP);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* We only destroy kegs from non secondary zones.
|
|
|
|
*/
|
|
|
|
if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
|
|
|
|
mtx_lock(&uma_mtx);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_REMOVE(keg, uk_link);
|
|
|
|
mtx_unlock(&uma_mtx);
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(kegs, keg, NULL, SKIP_NONE,
|
2005-07-20 18:47:42 +00:00
|
|
|
ZFREE_STATFREE);
|
2003-11-30 08:04:01 +00:00
|
|
|
}
|
2002-04-08 04:48:58 +00:00
|
|
|
}
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
|
|
|
* Traverses every zone in the system and calls a callback
|
|
|
|
*
|
|
|
|
* Arguments:
|
|
|
|
* zfunc A pointer to a function which accepts a zone
|
|
|
|
* as an argument.
|
2004-01-30 16:26:29 +00:00
|
|
|
*
|
2002-03-19 09:11:49 +00:00
|
|
|
* Returns:
|
|
|
|
* Nothing
|
|
|
|
*/
|
2004-01-30 16:26:29 +00:00
|
|
|
static void
|
2002-03-19 09:11:49 +00:00
|
|
|
zone_foreach(void (*zfunc)(uma_zone_t))
|
|
|
|
{
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
2002-03-19 09:11:49 +00:00
|
|
|
uma_zone_t zone;
|
|
|
|
|
|
|
|
mtx_lock(&uma_mtx);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_FOREACH(keg, &uma_kegs, uk_link) {
|
|
|
|
LIST_FOREACH(zone, &keg->uk_zones, uz_link)
|
|
|
|
zfunc(zone);
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
mtx_unlock(&uma_mtx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Public functions */
|
|
|
|
/* See uma.h */
|
|
|
|
void
|
2005-10-08 21:03:54 +00:00
|
|
|
uma_startup(void *bootmem, int boot_pages)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
|
|
|
struct uma_zctor_args args;
|
|
|
|
uma_slab_t slab;
|
2004-07-29 15:25:40 +00:00
|
|
|
u_int slabsize;
|
|
|
|
u_int objsize, totsize, wsize;
|
2002-03-19 09:11:49 +00:00
|
|
|
int i;
|
|
|
|
|
|
|
|
#ifdef UMA_DEBUG
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
printf("Creating uma keg headers zone and keg.\n");
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
2005-09-09 06:03:08 +00:00
|
|
|
mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2004-07-29 15:25:40 +00:00
|
|
|
/*
|
|
|
|
* Figure out the maximum number of items-per-slab we'll have if
|
|
|
|
* we're using the OFFPAGE slab header to track free items, given
|
|
|
|
* all possible object sizes and the maximum desired wastage
|
|
|
|
* (UMA_MAX_WASTE).
|
|
|
|
*
|
|
|
|
* We iterate until we find an object size for
|
2009-01-25 09:11:24 +00:00
|
|
|
* which the calculated wastage in keg_small_init() will be
|
2004-07-29 15:25:40 +00:00
|
|
|
* enough to warrant OFFPAGE. Since wastedspace versus objsize
|
|
|
|
* is an overall increasing see-saw function, we find the smallest
|
|
|
|
* objsize such that the wastage is always acceptable for objects
|
|
|
|
* with that objsize or smaller. Since a smaller objsize always
|
|
|
|
* generates a larger possible uma_max_ipers, we use this computed
|
|
|
|
* objsize to calculate the largest ipers possible. Since the
|
|
|
|
* ipers calculated for OFFPAGE slab headers is always larger than
|
2009-01-25 09:11:24 +00:00
|
|
|
* the ipers initially calculated in keg_small_init(), we use
|
2004-07-29 15:25:40 +00:00
|
|
|
* the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
|
|
|
|
* obtain the maximum ipers possible for offpage slab headers.
|
|
|
|
*
|
|
|
|
* It should be noted that ipers versus objsize is an inversly
|
|
|
|
* proportional function which drops off rather quickly so as
|
|
|
|
* long as our UMA_MAX_WASTE is such that the objsize we calculate
|
|
|
|
* falls into the portion of the inverse relation AFTER the steep
|
|
|
|
* falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
|
|
|
|
*
|
|
|
|
* Note that we have 8-bits (1 byte) to use as a freelist index
|
|
|
|
* inside the actual slab header itself and this is enough to
|
|
|
|
* accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
|
|
|
|
* object with offpage slab header would have ipers =
|
|
|
|
* UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
|
|
|
|
* 1 greater than what our byte-integer freelist index can
|
|
|
|
* accomodate, but we know that this situation never occurs as
|
|
|
|
* for UMA_SMALLEST_UNIT-sized objects, we will never calculate
|
|
|
|
* that we need to go to offpage slab headers. Or, if we do,
|
|
|
|
* then we trap that condition below and panic in the INVARIANTS case.
|
|
|
|
*/
|
|
|
|
wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
|
|
|
|
totsize = wsize;
|
|
|
|
objsize = UMA_SMALLEST_UNIT;
|
|
|
|
while (totsize >= wsize) {
|
|
|
|
totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
|
|
|
|
(objsize + UMA_FRITM_SZ);
|
|
|
|
totsize *= (UMA_FRITM_SZ + objsize);
|
|
|
|
objsize++;
|
|
|
|
}
|
|
|
|
if (objsize > UMA_SMALLEST_UNIT)
|
|
|
|
objsize--;
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
|
2004-07-29 15:25:40 +00:00
|
|
|
|
|
|
|
wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
|
|
|
|
totsize = wsize;
|
|
|
|
objsize = UMA_SMALLEST_UNIT;
|
|
|
|
while (totsize >= wsize) {
|
|
|
|
totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
|
|
|
|
(objsize + UMA_FRITMREF_SZ);
|
|
|
|
totsize *= (UMA_FRITMREF_SZ + objsize);
|
|
|
|
objsize++;
|
|
|
|
}
|
|
|
|
if (objsize > UMA_SMALLEST_UNIT)
|
|
|
|
objsize--;
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64);
|
2004-07-29 15:25:40 +00:00
|
|
|
|
|
|
|
KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
|
|
|
|
("uma_startup: calculated uma_max_ipers values too large!"));
|
|
|
|
|
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
|
|
|
|
printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
|
|
|
|
uma_max_ipers_ref);
|
|
|
|
#endif
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/* "manually" create the initial zone */
|
|
|
|
args.name = "UMA Kegs";
|
|
|
|
args.size = sizeof(struct uma_keg);
|
|
|
|
args.ctor = keg_ctor;
|
|
|
|
args.dtor = keg_dtor;
|
2002-03-19 09:11:49 +00:00
|
|
|
args.uminit = zero_init;
|
|
|
|
args.fini = NULL;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
args.keg = &masterkeg;
|
2002-03-19 09:11:49 +00:00
|
|
|
args.align = 32 - 1;
|
2003-09-19 08:37:44 +00:00
|
|
|
args.flags = UMA_ZFLAG_INTERNAL;
|
2002-03-19 09:11:49 +00:00
|
|
|
/* The initial zone has no Per cpu queues so it's smaller */
|
2004-08-02 00:18:36 +00:00
|
|
|
zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("Filling boot free list.\n");
|
|
|
|
#endif
|
2005-10-08 21:03:54 +00:00
|
|
|
for (i = 0; i < boot_pages; i++) {
|
2002-03-19 09:11:49 +00:00
|
|
|
slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
|
|
|
|
slab->us_data = (u_int8_t *)slab;
|
|
|
|
slab->us_flags = UMA_SLAB_BOOT;
|
|
|
|
LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
|
|
|
|
}
|
2005-09-09 06:03:08 +00:00
|
|
|
mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
#ifdef UMA_DEBUG
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
printf("Creating uma zone headers zone and keg.\n");
|
|
|
|
#endif
|
|
|
|
args.name = "UMA Zones";
|
|
|
|
args.size = sizeof(struct uma_zone) +
|
|
|
|
(sizeof(struct uma_cache) * (mp_maxid + 1));
|
|
|
|
args.ctor = zone_ctor;
|
|
|
|
args.dtor = zone_dtor;
|
|
|
|
args.uminit = zero_init;
|
|
|
|
args.fini = NULL;
|
|
|
|
args.keg = NULL;
|
|
|
|
args.align = 32 - 1;
|
|
|
|
args.flags = UMA_ZFLAG_INTERNAL;
|
|
|
|
/* The initial zone has no Per cpu queues so it's smaller */
|
2004-08-02 00:18:36 +00:00
|
|
|
zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("Initializing pcpu cache locks.\n");
|
|
|
|
#endif
|
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("Creating slab and hash zones.\n");
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the max number of free list items we'll have with
|
|
|
|
* offpage slabs.
|
|
|
|
*/
|
2004-07-29 15:25:40 +00:00
|
|
|
slabsize = uma_max_ipers * UMA_FRITM_SZ;
|
2002-03-19 09:11:49 +00:00
|
|
|
slabsize += sizeof(struct uma_slab);
|
|
|
|
|
|
|
|
/* Now make a zone for slab headers */
|
|
|
|
slabzone = uma_zcreate("UMA Slabs",
|
|
|
|
slabsize,
|
|
|
|
NULL, NULL, NULL, NULL,
|
2003-09-19 08:37:44 +00:00
|
|
|
UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/*
|
|
|
|
* We also create a zone for the bigger slabs with reference
|
|
|
|
* counts in them, to accomodate UMA_ZONE_REFCNT zones.
|
|
|
|
*/
|
2004-07-29 15:25:40 +00:00
|
|
|
slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
slabsize += sizeof(struct uma_slab_refcnt);
|
|
|
|
slabrefzone = uma_zcreate("UMA RCntSlabs",
|
|
|
|
slabsize,
|
|
|
|
NULL, NULL, NULL, NULL,
|
Make the slabrefzone, the zone from which we allocated slabs with
internal reference counters, UMA_ZONE_NOFREE. This way, those slabs
(with their ref counts) will be effectively type-stable, then using
a trick like this on the refcount is no longer dangerous:
MEXT_REM_REF(m);
if (atomic_cmpset_int(m->m_ext.ref_cnt, 0, 1)) {
if (m->m_ext.ext_type == EXT_PACKET) {
uma_zfree(zone_pack, m);
return;
} else if (m->m_ext.ext_type == EXT_CLUSTER) {
uma_zfree(zone_clust, m->m_ext.ext_buf);
m->m_ext.ext_buf = NULL;
} else {
(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
m->m_ext.ext_args);
if (m->m_ext.ext_type != EXT_EXTREF)
free(m->m_ext.ref_cnt, M_MBUF);
}
}
uma_zfree(zone_mbuf, m);
Previously, a second thread hitting the above cmpset might
actually read the refcnt AFTER it has already been freed. A very
rare occurance. Now we'll know that it won't be freed, though.
Spotted by: julian, pjd
2004-06-09 19:18:50 +00:00
|
|
|
UMA_ALIGN_PTR,
|
2004-06-09 20:50:08 +00:00
|
|
|
UMA_ZFLAG_INTERNAL);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
hashzone = uma_zcreate("UMA Hash",
|
|
|
|
sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
|
|
|
|
NULL, NULL, NULL, NULL,
|
2003-09-19 08:37:44 +00:00
|
|
|
UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket_init();
|
2002-03-19 09:11:49 +00:00
|
|
|
|
1. Prior to r214782, UMA did not support multipage allocations before
uma_startup2() was called. Thus, setting the variable "booted" to true in
uma_startup() was ok on machines with UMA_MD_SMALL_ALLOC defined, because
any allocations made after uma_startup() but before uma_startup2() could be
satisfied by uma_small_alloc(). Now, however, some multipage allocations
are necessary before uma_startup2() just to allocate zone structures on
machines with a large number of processors. Thus, a Boolean can no longer
effectively describe the state of the UMA allocator. Instead, make "booted"
have three values to describe how far initialization has progressed. This
allows multipage allocations to continue using startup_alloc() until
uma_startup2(), but single-page allocations may begin using
uma_small_alloc() after uma_startup().
2. With the aforementioned change, only a modest increase in boot pages is
necessary to boot UMA on a large number of processors.
3. Retire UMA_MD_SMALL_ALLOC_NEEDS_VM. It has only been used between
r182028 and r204128.
Reviewed by: attilio [1], nwhitehorn [3]
Tested by: sbruno
2011-05-21 17:43:43 +00:00
|
|
|
booted = UMA_STARTUP;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("UMA startup complete.\n");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/* see uma.h */
|
|
|
|
void
|
2002-09-18 08:26:30 +00:00
|
|
|
uma_startup2(void)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
1. Prior to r214782, UMA did not support multipage allocations before
uma_startup2() was called. Thus, setting the variable "booted" to true in
uma_startup() was ok on machines with UMA_MD_SMALL_ALLOC defined, because
any allocations made after uma_startup() but before uma_startup2() could be
satisfied by uma_small_alloc(). Now, however, some multipage allocations
are necessary before uma_startup2() just to allocate zone structures on
machines with a large number of processors. Thus, a Boolean can no longer
effectively describe the state of the UMA allocator. Instead, make "booted"
have three values to describe how far initialization has progressed. This
allows multipage allocations to continue using startup_alloc() until
uma_startup2(), but single-page allocations may begin using
uma_small_alloc() after uma_startup().
2. With the aforementioned change, only a modest increase in boot pages is
necessary to boot UMA on a large number of processors.
3. Retire UMA_MD_SMALL_ALLOC_NEEDS_VM. It has only been used between
r182028 and r204128.
Reviewed by: attilio [1], nwhitehorn [3]
Tested by: sbruno
2011-05-21 17:43:43 +00:00
|
|
|
booted = UMA_STARTUP2;
|
2002-04-08 06:20:34 +00:00
|
|
|
bucket_enable();
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("UMA startup2 complete.\n");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize our callout handle
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
uma_startup3(void)
|
|
|
|
{
|
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("Starting callout.\n");
|
|
|
|
#endif
|
2004-03-07 07:00:46 +00:00
|
|
|
callout_init(&uma_callout, CALLOUT_MPSAFE);
|
2003-09-19 23:27:46 +00:00
|
|
|
callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("UMA startup3 complete.\n");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
static uma_keg_t
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
|
2005-07-16 02:23:41 +00:00
|
|
|
int align, u_int32_t flags)
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
{
|
|
|
|
struct uma_kctor_args args;
|
|
|
|
|
|
|
|
args.size = size;
|
|
|
|
args.uminit = uminit;
|
|
|
|
args.fini = fini;
|
2007-02-11 20:13:52 +00:00
|
|
|
args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
args.flags = flags;
|
|
|
|
args.zone = zone;
|
2009-01-25 09:11:24 +00:00
|
|
|
return (zone_alloc_item(kegs, &args, M_WAITOK));
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
}
|
|
|
|
|
2007-02-11 20:13:52 +00:00
|
|
|
/* See uma.h */
|
|
|
|
void
|
|
|
|
uma_set_align(int align)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (align != UMA_ALIGN_CACHE)
|
|
|
|
uma_align_cache = align;
|
|
|
|
}
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/* See uma.h */
|
2004-01-30 16:26:29 +00:00
|
|
|
uma_zone_t
|
2002-05-02 07:36:30 +00:00
|
|
|
uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
|
2005-07-16 02:23:41 +00:00
|
|
|
uma_init uminit, uma_fini fini, int align, u_int32_t flags)
|
2004-01-30 16:26:29 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
|
|
|
struct uma_zctor_args args;
|
|
|
|
|
|
|
|
/* This stuff is essential for the zone ctor */
|
|
|
|
args.name = name;
|
|
|
|
args.size = size;
|
|
|
|
args.ctor = ctor;
|
|
|
|
args.dtor = dtor;
|
|
|
|
args.uminit = uminit;
|
|
|
|
args.fini = fini;
|
|
|
|
args.align = align;
|
|
|
|
args.flags = flags;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
args.keg = NULL;
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
return (zone_alloc_item(zones, &args, M_WAITOK));
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* See uma.h */
|
|
|
|
uma_zone_t
|
|
|
|
uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
|
|
|
|
uma_init zinit, uma_fini zfini, uma_zone_t master)
|
|
|
|
{
|
|
|
|
struct uma_zctor_args args;
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_keg_t keg;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(master);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
args.name = name;
|
2009-01-25 09:11:24 +00:00
|
|
|
args.size = keg->uk_size;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
args.ctor = ctor;
|
|
|
|
args.dtor = dtor;
|
|
|
|
args.uminit = zinit;
|
|
|
|
args.fini = zfini;
|
2009-01-25 09:11:24 +00:00
|
|
|
args.align = keg->uk_align;
|
|
|
|
args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
|
|
|
|
args.keg = keg;
|
|
|
|
|
|
|
|
/* XXX Attaches only one keg of potentially many. */
|
|
|
|
return (zone_alloc_item(zones, &args, M_WAITOK));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
zone_lock_pair(uma_zone_t a, uma_zone_t b)
|
|
|
|
{
|
|
|
|
if (a < b) {
|
|
|
|
ZONE_LOCK(a);
|
|
|
|
mtx_lock_flags(b->uz_lock, MTX_DUPOK);
|
|
|
|
} else {
|
|
|
|
ZONE_LOCK(b);
|
|
|
|
mtx_lock_flags(a->uz_lock, MTX_DUPOK);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
zone_unlock_pair(uma_zone_t a, uma_zone_t b)
|
|
|
|
{
|
|
|
|
|
|
|
|
ZONE_UNLOCK(a);
|
|
|
|
ZONE_UNLOCK(b);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
|
|
|
|
{
|
|
|
|
uma_klink_t klink;
|
|
|
|
uma_klink_t kl;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = 0;
|
|
|
|
klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
|
|
|
|
|
|
|
|
zone_lock_pair(zone, master);
|
|
|
|
/*
|
|
|
|
* zone must use vtoslab() to resolve objects and must already be
|
|
|
|
* a secondary.
|
|
|
|
*/
|
|
|
|
if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
|
|
|
|
!= (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* The new master must also use vtoslab().
|
|
|
|
*/
|
|
|
|
if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Both must either be refcnt, or not be refcnt.
|
|
|
|
*/
|
|
|
|
if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
|
|
|
|
(master->uz_flags & UMA_ZONE_REFCNT)) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* The underlying object must be the same size. rsize
|
|
|
|
* may be different.
|
|
|
|
*/
|
|
|
|
if (master->uz_size != zone->uz_size) {
|
|
|
|
error = E2BIG;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Put it at the end of the list.
|
|
|
|
*/
|
|
|
|
klink->kl_keg = zone_first_keg(master);
|
|
|
|
LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
|
|
|
|
if (LIST_NEXT(kl, kl_link) == NULL) {
|
|
|
|
LIST_INSERT_AFTER(kl, klink, kl_link);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
klink = NULL;
|
|
|
|
zone->uz_flags |= UMA_ZFLAG_MULTI;
|
|
|
|
zone->uz_slab = zone_fetch_slab_multi;
|
|
|
|
|
|
|
|
out:
|
|
|
|
zone_unlock_pair(zone, master);
|
|
|
|
if (klink != NULL)
|
|
|
|
free(klink, M_TEMP);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
return (error);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
|
2002-04-08 04:48:58 +00:00
|
|
|
/* See uma.h */
|
|
|
|
void
|
|
|
|
uma_zdestroy(uma_zone_t zone)
|
|
|
|
{
|
2005-07-20 18:47:42 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
|
2002-04-08 04:48:58 +00:00
|
|
|
}
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/* See uma.h */
|
|
|
|
void *
|
2002-04-30 04:26:34 +00:00
|
|
|
uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
|
|
|
void *item;
|
|
|
|
uma_cache_t cache;
|
|
|
|
uma_bucket_t bucket;
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
/* This is the fast path allocation */
|
|
|
|
#ifdef UMA_DEBUG_ALLOC_1
|
|
|
|
printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
|
|
|
|
#endif
|
2004-08-06 21:52:38 +00:00
|
|
|
CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
|
|
|
|
zone->uz_name, flags);
|
2002-04-08 02:42:55 +00:00
|
|
|
|
2007-01-10 21:04:43 +00:00
|
|
|
if (flags & M_WAITOK) {
|
|
|
|
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
|
|
|
|
"uma_zalloc_arg: zone \"%s\"", zone->uz_name);
|
2002-05-20 17:54:48 +00:00
|
|
|
}
|
2011-10-12 18:08:28 +00:00
|
|
|
#ifdef DEBUG_MEMGUARD
|
|
|
|
if (memguard_cmp_zone(zone)) {
|
|
|
|
item = memguard_alloc(zone->uz_size, flags);
|
|
|
|
if (item != NULL) {
|
|
|
|
/*
|
|
|
|
* Avoid conflict with the use-after-free
|
|
|
|
* protecting infrastructure from INVARIANTS.
|
|
|
|
*/
|
|
|
|
if (zone->uz_init != NULL &&
|
|
|
|
zone->uz_init != mtrash_init &&
|
|
|
|
zone->uz_init(item, zone->uz_size, flags) != 0)
|
|
|
|
return (NULL);
|
|
|
|
if (zone->uz_ctor != NULL &&
|
|
|
|
zone->uz_ctor != mtrash_ctor &&
|
|
|
|
zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
|
|
|
|
zone->uz_fini(item, zone->uz_size);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
return (item);
|
|
|
|
}
|
|
|
|
/* This is unfortunate but should not be fatal. */
|
|
|
|
}
|
|
|
|
#endif
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
/*
|
|
|
|
* If possible, allocate from the per-CPU cache. There are two
|
|
|
|
* requirements for safe access to the per-CPU cache: (1) the thread
|
|
|
|
* accessing the cache must not be preempted or yield during access,
|
|
|
|
* and (2) the thread must not migrate CPUs without switching which
|
|
|
|
* cache it accesses. We rely on a critical section to prevent
|
|
|
|
* preemption and migration. We release the critical section in
|
|
|
|
* order to acquire the zone mutex if we are unable to allocate from
|
|
|
|
* the current cache; when we re-acquire the critical section, we
|
|
|
|
* must detect and handle migration if it has occurred.
|
|
|
|
*/
|
2002-04-08 02:42:55 +00:00
|
|
|
zalloc_restart:
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
critical_enter();
|
|
|
|
cpu = curcpu;
|
2002-03-19 09:11:49 +00:00
|
|
|
cache = &zone->uz_cpu[cpu];
|
|
|
|
|
|
|
|
zalloc_start:
|
|
|
|
bucket = cache->uc_allocbucket;
|
|
|
|
|
|
|
|
if (bucket) {
|
2003-09-19 06:26:45 +00:00
|
|
|
if (bucket->ub_cnt > 0) {
|
|
|
|
bucket->ub_cnt--;
|
|
|
|
item = bucket->ub_bucket[bucket->ub_cnt];
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef INVARIANTS
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket->ub_bucket[bucket->ub_cnt] = NULL;
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
|
|
|
KASSERT(item != NULL,
|
|
|
|
("uma_zalloc: Bucket pointer mangled."));
|
|
|
|
cache->uc_allocs++;
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
critical_exit();
|
2002-05-02 02:08:48 +00:00
|
|
|
#ifdef INVARIANTS
|
2002-11-11 11:50:03 +00:00
|
|
|
ZONE_LOCK(zone);
|
2002-05-02 02:08:48 +00:00
|
|
|
uma_dbg_alloc(zone, NULL, item);
|
2002-11-11 11:50:03 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
2002-05-02 02:08:48 +00:00
|
|
|
#endif
|
2004-08-02 00:18:36 +00:00
|
|
|
if (zone->uz_ctor != NULL) {
|
2009-01-25 09:11:24 +00:00
|
|
|
if (zone->uz_ctor(item, zone->uz_size,
|
2004-08-02 00:18:36 +00:00
|
|
|
udata, flags) != 0) {
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(zone, item, udata,
|
2005-07-20 18:47:42 +00:00
|
|
|
SKIP_DTOR, ZFREE_STATFAIL |
|
|
|
|
ZFREE_STATFREE);
|
2004-08-02 00:18:36 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
2002-04-30 04:26:34 +00:00
|
|
|
if (flags & M_ZERO)
|
2009-01-25 09:11:24 +00:00
|
|
|
bzero(item, zone->uz_size);
|
2002-03-19 09:11:49 +00:00
|
|
|
return (item);
|
|
|
|
} else if (cache->uc_freebucket) {
|
|
|
|
/*
|
|
|
|
* We have run out of items in our allocbucket.
|
|
|
|
* See if we can switch with our free bucket.
|
|
|
|
*/
|
2003-09-19 06:26:45 +00:00
|
|
|
if (cache->uc_freebucket->ub_cnt > 0) {
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef UMA_DEBUG_ALLOC
|
2003-09-19 22:31:45 +00:00
|
|
|
printf("uma_zalloc: Swapping empty with"
|
|
|
|
" alloc.\n");
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
2003-09-19 06:41:06 +00:00
|
|
|
bucket = cache->uc_freebucket;
|
2002-03-19 09:11:49 +00:00
|
|
|
cache->uc_freebucket = cache->uc_allocbucket;
|
2003-09-19 06:41:06 +00:00
|
|
|
cache->uc_allocbucket = bucket;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
goto zalloc_start;
|
|
|
|
}
|
|
|
|
}
|
2002-04-08 02:42:55 +00:00
|
|
|
}
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
/*
|
|
|
|
* Attempt to retrieve the item from the per-CPU cache has failed, so
|
|
|
|
* we must go back to the zone. This requires the zone lock, so we
|
|
|
|
* must drop the critical section, then re-acquire it when we go back
|
|
|
|
* to the cache. Since the critical section is released, we may be
|
|
|
|
* preempted or migrate. As such, make sure not to maintain any
|
|
|
|
* thread-local state specific to the cache from prior to releasing
|
|
|
|
* the critical section.
|
|
|
|
*/
|
|
|
|
critical_exit();
|
2002-04-08 02:42:55 +00:00
|
|
|
ZONE_LOCK(zone);
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
critical_enter();
|
|
|
|
cpu = curcpu;
|
|
|
|
cache = &zone->uz_cpu[cpu];
|
|
|
|
bucket = cache->uc_allocbucket;
|
|
|
|
if (bucket != NULL) {
|
|
|
|
if (bucket->ub_cnt > 0) {
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
goto zalloc_start;
|
|
|
|
}
|
|
|
|
bucket = cache->uc_freebucket;
|
|
|
|
if (bucket != NULL && bucket->ub_cnt > 0) {
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
goto zalloc_start;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-04-08 02:42:55 +00:00
|
|
|
/* Since we have locked the zone we may as well send back our stats */
|
|
|
|
zone->uz_allocs += cache->uc_allocs;
|
|
|
|
cache->uc_allocs = 0;
|
2005-07-14 16:17:21 +00:00
|
|
|
zone->uz_frees += cache->uc_frees;
|
|
|
|
cache->uc_frees = 0;
|
2002-04-08 02:42:55 +00:00
|
|
|
|
|
|
|
/* Our old one is now a free bucket */
|
|
|
|
if (cache->uc_allocbucket) {
|
2003-09-19 06:26:45 +00:00
|
|
|
KASSERT(cache->uc_allocbucket->ub_cnt == 0,
|
2002-04-08 02:42:55 +00:00
|
|
|
("uma_zalloc_arg: Freeing a non free bucket."));
|
|
|
|
LIST_INSERT_HEAD(&zone->uz_free_bucket,
|
|
|
|
cache->uc_allocbucket, ub_link);
|
|
|
|
cache->uc_allocbucket = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check the free list for a new alloc bucket */
|
|
|
|
if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
|
2003-09-19 06:26:45 +00:00
|
|
|
KASSERT(bucket->ub_cnt != 0,
|
2002-04-08 02:42:55 +00:00
|
|
|
("uma_zalloc_arg: Returning an empty bucket."));
|
|
|
|
|
|
|
|
LIST_REMOVE(bucket, ub_link);
|
|
|
|
cache->uc_allocbucket = bucket;
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
goto zalloc_start;
|
2004-01-30 16:26:29 +00:00
|
|
|
}
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
/* We are no longer associated with this CPU. */
|
|
|
|
critical_exit();
|
2002-10-24 07:59:03 +00:00
|
|
|
|
2002-04-08 02:42:55 +00:00
|
|
|
/* Bump up our uz_count so we get here less */
|
2003-09-19 06:26:45 +00:00
|
|
|
if (zone->uz_count < BUCKET_MAX)
|
2002-04-08 02:42:55 +00:00
|
|
|
zone->uz_count++;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
2002-04-08 02:42:55 +00:00
|
|
|
* Now lets just fill a bucket and put it on the free list. If that
|
|
|
|
* works we'll restart the allocation from the begining.
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
2009-01-25 09:11:24 +00:00
|
|
|
if (zone_alloc_bucket(zone, flags)) {
|
2002-10-24 07:59:03 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
goto zalloc_restart;
|
|
|
|
}
|
2002-04-08 02:42:55 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
2002-10-24 07:59:03 +00:00
|
|
|
/*
|
|
|
|
* We may not be able to get a bucket so return an actual item.
|
|
|
|
*/
|
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("uma_zalloc_arg: Bucketzone returned NULL\n");
|
|
|
|
#endif
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
item = zone_alloc_item(zone, udata, flags);
|
|
|
|
return (item);
|
2002-10-24 07:59:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static uma_slab_t
|
2009-01-25 09:11:24 +00:00
|
|
|
keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
|
2002-10-24 07:59:03 +00:00
|
|
|
{
|
|
|
|
uma_slab_t slab;
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
mtx_assert(&keg->uk_lock, MA_OWNED);
|
2002-10-24 07:59:03 +00:00
|
|
|
slab = NULL;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
/*
|
|
|
|
* Find a slab with some space. Prefer slabs that are partially
|
|
|
|
* used over those that are totally full. This helps to reduce
|
|
|
|
* fragmentation.
|
|
|
|
*/
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_free != 0) {
|
|
|
|
if (!LIST_EMPTY(&keg->uk_part_slab)) {
|
|
|
|
slab = LIST_FIRST(&keg->uk_part_slab);
|
2002-10-24 07:59:03 +00:00
|
|
|
} else {
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
slab = LIST_FIRST(&keg->uk_free_slab);
|
2002-10-24 07:59:03 +00:00
|
|
|
LIST_REMOVE(slab, us_link);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
|
|
|
|
us_link);
|
2002-10-24 07:59:03 +00:00
|
|
|
}
|
2009-01-25 09:11:24 +00:00
|
|
|
MPASS(slab->us_keg == keg);
|
2002-10-24 07:59:03 +00:00
|
|
|
return (slab);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* M_NOVM means don't ask at all!
|
|
|
|
*/
|
|
|
|
if (flags & M_NOVM)
|
|
|
|
break;
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_flags |= UMA_ZFLAG_FULL;
|
2009-01-25 09:11:24 +00:00
|
|
|
/*
|
|
|
|
* If this is not a multi-zone, set the FULL bit.
|
|
|
|
* Otherwise slab_multi() takes care of it.
|
|
|
|
*/
|
|
|
|
if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0)
|
|
|
|
zone->uz_flags |= UMA_ZFLAG_FULL;
|
2003-01-20 01:32:56 +00:00
|
|
|
if (flags & M_NOWAIT)
|
2002-10-24 07:59:03 +00:00
|
|
|
break;
|
2009-01-25 09:11:24 +00:00
|
|
|
msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
|
2002-10-24 07:59:03 +00:00
|
|
|
continue;
|
|
|
|
}
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_recurse++;
|
2009-01-25 09:11:24 +00:00
|
|
|
slab = keg_alloc_slab(keg, zone, flags);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_recurse--;
|
2004-01-30 16:26:29 +00:00
|
|
|
/*
|
2002-10-24 07:59:03 +00:00
|
|
|
* If we got a slab here it's safe to mark it partially used
|
|
|
|
* and return. We assume that the caller is going to remove
|
|
|
|
* at least one item.
|
|
|
|
*/
|
|
|
|
if (slab) {
|
2009-01-25 09:11:24 +00:00
|
|
|
MPASS(slab->us_keg == keg);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
|
2002-10-24 07:59:03 +00:00
|
|
|
return (slab);
|
|
|
|
}
|
2004-01-30 16:26:29 +00:00
|
|
|
/*
|
2002-10-24 07:59:03 +00:00
|
|
|
* We might not have been able to get a slab but another cpu
|
|
|
|
* could have while we were unlocked. Check again before we
|
|
|
|
* fail.
|
|
|
|
*/
|
2009-01-25 09:11:24 +00:00
|
|
|
flags |= M_NOVM;
|
2002-10-24 07:59:03 +00:00
|
|
|
}
|
|
|
|
return (slab);
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
static inline void
|
|
|
|
zone_relock(uma_zone_t zone, uma_keg_t keg)
|
|
|
|
{
|
|
|
|
if (zone->uz_lock != &keg->uk_lock) {
|
|
|
|
KEG_UNLOCK(keg);
|
|
|
|
ZONE_LOCK(zone);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
keg_relock(uma_keg_t keg, uma_zone_t zone)
|
|
|
|
{
|
|
|
|
if (zone->uz_lock != &keg->uk_lock) {
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
KEG_LOCK(keg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uma_slab_t
|
|
|
|
zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
|
|
|
|
{
|
|
|
|
uma_slab_t slab;
|
|
|
|
|
|
|
|
if (keg == NULL)
|
|
|
|
keg = zone_first_keg(zone);
|
|
|
|
/*
|
|
|
|
* This is to prevent us from recursively trying to allocate
|
|
|
|
* buckets. The problem is that if an allocation forces us to
|
|
|
|
* grab a new bucket we will call page_alloc, which will go off
|
|
|
|
* and cause the vm to allocate vm_map_entries. If we need new
|
|
|
|
* buckets there too we will recurse in kmem_alloc and bad
|
|
|
|
* things happen. So instead we return a NULL bucket, and make
|
|
|
|
* the code that allocates buckets smart enough to deal with it
|
|
|
|
*/
|
|
|
|
if (keg->uk_flags & UMA_ZFLAG_BUCKET && keg->uk_recurse != 0)
|
|
|
|
return (NULL);
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
slab = keg_fetch_slab(keg, zone, flags);
|
|
|
|
if (slab)
|
|
|
|
return (slab);
|
|
|
|
if (flags & (M_NOWAIT | M_NOVM))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
|
|
|
|
* with the keg locked. Caller must call zone_relock() afterwards if the
|
|
|
|
* zone lock is required. On NULL the zone lock is held.
|
|
|
|
*
|
|
|
|
* The last pointer is used to seed the search. It is not required.
|
|
|
|
*/
|
|
|
|
static uma_slab_t
|
|
|
|
zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
|
|
|
|
{
|
|
|
|
uma_klink_t klink;
|
|
|
|
uma_slab_t slab;
|
|
|
|
uma_keg_t keg;
|
|
|
|
int flags;
|
|
|
|
int empty;
|
|
|
|
int full;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't wait on the first pass. This will skip limit tests
|
|
|
|
* as well. We don't want to block if we can find a provider
|
|
|
|
* without blocking.
|
|
|
|
*/
|
|
|
|
flags = (rflags & ~M_WAITOK) | M_NOWAIT;
|
|
|
|
/*
|
|
|
|
* Use the last slab allocated as a hint for where to start
|
|
|
|
* the search.
|
|
|
|
*/
|
|
|
|
if (last) {
|
|
|
|
slab = keg_fetch_slab(last, zone, flags);
|
|
|
|
if (slab)
|
|
|
|
return (slab);
|
|
|
|
zone_relock(zone, last);
|
|
|
|
last = NULL;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Loop until we have a slab incase of transient failures
|
|
|
|
* while M_WAITOK is specified. I'm not sure this is 100%
|
|
|
|
* required but we've done it for so long now.
|
|
|
|
*/
|
|
|
|
for (;;) {
|
|
|
|
empty = 0;
|
|
|
|
full = 0;
|
|
|
|
/*
|
|
|
|
* Search the available kegs for slabs. Be careful to hold the
|
|
|
|
* correct lock while calling into the keg layer.
|
|
|
|
*/
|
|
|
|
LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
|
|
|
|
keg = klink->kl_keg;
|
|
|
|
keg_relock(keg, zone);
|
|
|
|
if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
|
|
|
|
slab = keg_fetch_slab(keg, zone, flags);
|
|
|
|
if (slab)
|
|
|
|
return (slab);
|
|
|
|
}
|
|
|
|
if (keg->uk_flags & UMA_ZFLAG_FULL)
|
|
|
|
full++;
|
|
|
|
else
|
|
|
|
empty++;
|
|
|
|
zone_relock(zone, keg);
|
|
|
|
}
|
|
|
|
if (rflags & (M_NOWAIT | M_NOVM))
|
|
|
|
break;
|
|
|
|
flags = rflags;
|
|
|
|
/*
|
|
|
|
* All kegs are full. XXX We can't atomically check all kegs
|
|
|
|
* and sleep so just sleep for a short period and retry.
|
|
|
|
*/
|
|
|
|
if (full && !empty) {
|
|
|
|
zone->uz_flags |= UMA_ZFLAG_FULL;
|
2010-06-15 19:28:37 +00:00
|
|
|
zone->uz_sleeps++;
|
2009-01-25 09:11:24 +00:00
|
|
|
msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
|
|
|
|
zone->uz_flags &= ~UMA_ZFLAG_FULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2003-07-30 18:55:15 +00:00
|
|
|
static void *
|
2009-01-25 09:11:24 +00:00
|
|
|
slab_alloc_item(uma_zone_t zone, uma_slab_t slab)
|
2002-10-24 07:59:03 +00:00
|
|
|
{
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
uma_slabrefcnt_t slabref;
|
2002-10-24 07:59:03 +00:00
|
|
|
void *item;
|
|
|
|
u_int8_t freei;
|
2004-01-30 16:26:29 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = slab->us_keg;
|
|
|
|
mtx_assert(&keg->uk_lock, MA_OWNED);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2002-10-24 07:59:03 +00:00
|
|
|
freei = slab->us_firstfree;
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_REFCNT) {
|
|
|
|
slabref = (uma_slabrefcnt_t)slab;
|
|
|
|
slab->us_firstfree = slabref->us_freelist[freei].us_item;
|
|
|
|
} else {
|
|
|
|
slab->us_firstfree = slab->us_freelist[freei].us_item;
|
|
|
|
}
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
item = slab->us_data + (keg->uk_rsize * freei);
|
2002-10-24 07:59:03 +00:00
|
|
|
|
|
|
|
slab->us_freecount--;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_free--;
|
2002-10-24 07:59:03 +00:00
|
|
|
#ifdef INVARIANTS
|
|
|
|
uma_dbg_alloc(zone, slab, item);
|
|
|
|
#endif
|
|
|
|
/* Move this slab to the full list */
|
|
|
|
if (slab->us_freecount == 0) {
|
|
|
|
LIST_REMOVE(slab, us_link);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
|
2002-10-24 07:59:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return (item);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_alloc_bucket(uma_zone_t zone, int flags)
|
2002-10-24 07:59:03 +00:00
|
|
|
{
|
|
|
|
uma_bucket_t bucket;
|
|
|
|
uma_slab_t slab;
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_keg_t keg;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
int16_t saved;
|
2004-08-02 00:18:36 +00:00
|
|
|
int max, origflags = flags;
|
2002-10-24 07:59:03 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Try this zone's free list first so we don't allocate extra buckets.
|
|
|
|
*/
|
|
|
|
if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
|
2003-09-19 06:26:45 +00:00
|
|
|
KASSERT(bucket->ub_cnt == 0,
|
2009-01-25 09:11:24 +00:00
|
|
|
("zone_alloc_bucket: Bucket on free list is not empty."));
|
2002-10-24 07:59:03 +00:00
|
|
|
LIST_REMOVE(bucket, ub_link);
|
|
|
|
} else {
|
2002-06-17 22:02:41 +00:00
|
|
|
int bflags;
|
|
|
|
|
2003-09-19 06:26:45 +00:00
|
|
|
bflags = (flags & ~M_ZERO);
|
2009-01-25 09:11:24 +00:00
|
|
|
if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
|
2002-06-17 22:02:41 +00:00
|
|
|
bflags |= M_NOVM;
|
|
|
|
|
2002-10-24 07:59:03 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket = bucket_alloc(zone->uz_count, bflags);
|
2002-10-24 07:59:03 +00:00
|
|
|
ZONE_LOCK(zone);
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
if (bucket == NULL) {
|
2002-10-24 07:59:03 +00:00
|
|
|
return (0);
|
2009-01-25 09:11:24 +00:00
|
|
|
}
|
2002-10-24 07:59:03 +00:00
|
|
|
|
|
|
|
#ifdef SMP
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
2002-10-24 07:59:03 +00:00
|
|
|
* This code is here to limit the number of simultaneous bucket fills
|
|
|
|
* for any given zone to the number of per cpu caches in this zone. This
|
|
|
|
* is done so that we don't allocate more memory than we really need.
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
2002-10-24 07:59:03 +00:00
|
|
|
if (zone->uz_fills >= mp_ncpus)
|
|
|
|
goto done;
|
|
|
|
|
2002-04-08 02:42:55 +00:00
|
|
|
#endif
|
2002-10-24 07:59:03 +00:00
|
|
|
zone->uz_fills++;
|
|
|
|
|
2003-09-19 09:22:04 +00:00
|
|
|
max = MIN(bucket->ub_entries, zone->uz_count);
|
2002-10-24 07:59:03 +00:00
|
|
|
/* Try to keep the buckets totally full */
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
saved = bucket->ub_cnt;
|
2009-01-25 09:11:24 +00:00
|
|
|
slab = NULL;
|
|
|
|
keg = NULL;
|
2003-09-19 09:22:04 +00:00
|
|
|
while (bucket->ub_cnt < max &&
|
2009-01-25 09:11:24 +00:00
|
|
|
(slab = zone->uz_slab(zone, keg, flags)) != NULL) {
|
|
|
|
keg = slab->us_keg;
|
2003-09-19 09:22:04 +00:00
|
|
|
while (slab->us_freecount && bucket->ub_cnt < max) {
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket->ub_bucket[bucket->ub_cnt++] =
|
2009-01-25 09:11:24 +00:00
|
|
|
slab_alloc_item(zone, slab);
|
2002-10-24 07:59:03 +00:00
|
|
|
}
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2002-10-24 07:59:03 +00:00
|
|
|
/* Don't block on the next fill */
|
|
|
|
flags |= M_NOWAIT;
|
|
|
|
}
|
2009-01-25 09:11:24 +00:00
|
|
|
if (slab)
|
|
|
|
zone_relock(zone, keg);
|
2002-04-08 02:42:55 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/*
|
|
|
|
* We unlock here because we need to call the zone's init.
|
|
|
|
* It should be safe to unlock because the slab dealt with
|
|
|
|
* above is already on the appropriate list within the keg
|
|
|
|
* and the bucket we filled is not yet on any list, so we
|
|
|
|
* own it.
|
|
|
|
*/
|
|
|
|
if (zone->uz_init != NULL) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
for (i = saved; i < bucket->ub_cnt; i++)
|
2009-01-25 09:11:24 +00:00
|
|
|
if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
|
|
|
|
origflags) != 0)
|
2004-08-02 00:18:36 +00:00
|
|
|
break;
|
|
|
|
/*
|
|
|
|
* If we couldn't initialize the whole bucket, put the
|
|
|
|
* rest back onto the freelist.
|
|
|
|
*/
|
|
|
|
if (i != bucket->ub_cnt) {
|
|
|
|
int j;
|
|
|
|
|
2004-10-27 21:19:35 +00:00
|
|
|
for (j = i; j < bucket->ub_cnt; j++) {
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(zone, bucket->ub_bucket[j],
|
2005-07-15 23:34:39 +00:00
|
|
|
NULL, SKIP_FINI, 0);
|
2004-10-27 21:19:35 +00:00
|
|
|
#ifdef INVARIANTS
|
|
|
|
bucket->ub_bucket[j] = NULL;
|
|
|
|
#endif
|
|
|
|
}
|
2004-08-02 00:18:36 +00:00
|
|
|
bucket->ub_cnt = i;
|
|
|
|
}
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
ZONE_LOCK(zone);
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
zone->uz_fills--;
|
2003-09-19 06:26:45 +00:00
|
|
|
if (bucket->ub_cnt != 0) {
|
2002-10-24 07:59:03 +00:00
|
|
|
LIST_INSERT_HEAD(&zone->uz_full_bucket,
|
|
|
|
bucket, ub_link);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
#ifdef SMP
|
|
|
|
done:
|
|
|
|
#endif
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket_free(bucket);
|
2002-10-24 07:59:03 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
2002-10-24 07:59:03 +00:00
|
|
|
* Allocates an item for an internal zone
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Arguments
|
|
|
|
* zone The zone to alloc for.
|
|
|
|
* udata The data to be passed to the constructor.
|
2003-02-19 05:47:46 +00:00
|
|
|
* flags M_WAITOK, M_NOWAIT, M_ZERO.
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* Returns
|
|
|
|
* NULL if there is no memory and M_NOWAIT is set
|
2002-10-24 07:59:03 +00:00
|
|
|
* An item if successful
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
static void *
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_alloc_item(uma_zone_t zone, void *udata, int flags)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
|
|
|
uma_slab_t slab;
|
|
|
|
void *item;
|
|
|
|
|
|
|
|
item = NULL;
|
|
|
|
|
|
|
|
#ifdef UMA_DEBUG_ALLOC
|
|
|
|
printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
|
|
|
|
#endif
|
|
|
|
ZONE_LOCK(zone);
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
slab = zone->uz_slab(zone, NULL, flags);
|
2002-10-24 07:59:03 +00:00
|
|
|
if (slab == NULL) {
|
2005-07-15 23:34:39 +00:00
|
|
|
zone->uz_fails++;
|
2002-10-24 07:59:03 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
return (NULL);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
item = slab_alloc_item(zone, slab);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_relock(zone, slab->us_keg);
|
2005-07-14 16:13:46 +00:00
|
|
|
zone->uz_allocs++;
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/*
|
|
|
|
* We have to call both the zone's init (not the keg's init)
|
|
|
|
* and the zone's ctor. This is because the item is going from
|
|
|
|
* a keg slab directly to the user, and the user is expecting it
|
|
|
|
* to be both zone-init'd as well as zone-ctor'd.
|
|
|
|
*/
|
2004-08-02 00:18:36 +00:00
|
|
|
if (zone->uz_init != NULL) {
|
2009-01-25 09:11:24 +00:00
|
|
|
if (zone->uz_init(item, zone->uz_size, flags) != 0) {
|
|
|
|
zone_free_item(zone, item, udata, SKIP_FINI,
|
2005-07-20 18:47:42 +00:00
|
|
|
ZFREE_STATFAIL | ZFREE_STATFREE);
|
2004-08-02 00:18:36 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (zone->uz_ctor != NULL) {
|
2009-01-25 09:11:24 +00:00
|
|
|
if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
|
|
|
|
zone_free_item(zone, item, udata, SKIP_DTOR,
|
2005-07-20 18:47:42 +00:00
|
|
|
ZFREE_STATFAIL | ZFREE_STATFREE);
|
2004-08-02 00:18:36 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
2002-10-24 07:59:03 +00:00
|
|
|
if (flags & M_ZERO)
|
2009-01-25 09:11:24 +00:00
|
|
|
bzero(item, zone->uz_size);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
return (item);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See uma.h */
|
|
|
|
void
|
|
|
|
uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
|
|
|
|
{
|
|
|
|
uma_cache_t cache;
|
|
|
|
uma_bucket_t bucket;
|
2002-06-17 23:53:58 +00:00
|
|
|
int bflags;
|
2002-03-19 09:11:49 +00:00
|
|
|
int cpu;
|
|
|
|
|
|
|
|
#ifdef UMA_DEBUG_ALLOC_1
|
|
|
|
printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
|
|
|
|
#endif
|
2004-08-06 21:52:38 +00:00
|
|
|
CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
|
|
|
|
zone->uz_name);
|
|
|
|
|
2010-10-19 16:06:00 +00:00
|
|
|
/* uma_zfree(..., NULL) does nothing, to match free(9). */
|
|
|
|
if (item == NULL)
|
|
|
|
return;
|
2011-10-12 18:08:28 +00:00
|
|
|
#ifdef DEBUG_MEMGUARD
|
|
|
|
if (is_memguard_addr(item)) {
|
|
|
|
if (zone->uz_dtor != NULL && zone->uz_dtor != mtrash_dtor)
|
|
|
|
zone->uz_dtor(item, zone->uz_size, udata);
|
|
|
|
if (zone->uz_fini != NULL && zone->uz_fini != mtrash_fini)
|
|
|
|
zone->uz_fini(item, zone->uz_size);
|
|
|
|
memguard_free(item);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
if (zone->uz_dtor)
|
2009-01-25 09:11:24 +00:00
|
|
|
zone->uz_dtor(item, zone->uz_size, udata);
|
|
|
|
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
#ifdef INVARIANTS
|
|
|
|
ZONE_LOCK(zone);
|
2009-01-25 09:11:24 +00:00
|
|
|
if (zone->uz_flags & UMA_ZONE_MALLOC)
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
uma_dbg_free(zone, udata, item);
|
|
|
|
else
|
|
|
|
uma_dbg_free(zone, NULL, item);
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
#endif
|
2002-04-14 01:56:25 +00:00
|
|
|
/*
|
|
|
|
* The race here is acceptable. If we miss it we'll just have to wait
|
|
|
|
* a little longer for the limits to be reset.
|
|
|
|
*/
|
2009-01-25 09:11:24 +00:00
|
|
|
if (zone->uz_flags & UMA_ZFLAG_FULL)
|
2002-04-14 01:56:25 +00:00
|
|
|
goto zfree_internal;
|
|
|
|
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
/*
|
|
|
|
* If possible, free to the per-CPU cache. There are two
|
|
|
|
* requirements for safe access to the per-CPU cache: (1) the thread
|
|
|
|
* accessing the cache must not be preempted or yield during access,
|
|
|
|
* and (2) the thread must not migrate CPUs without switching which
|
|
|
|
* cache it accesses. We rely on a critical section to prevent
|
|
|
|
* preemption and migration. We release the critical section in
|
|
|
|
* order to acquire the zone mutex if we are unable to free to the
|
|
|
|
* current cache; when we re-acquire the critical section, we must
|
|
|
|
* detect and handle migration if it has occurred.
|
|
|
|
*/
|
2002-04-08 02:42:55 +00:00
|
|
|
zfree_restart:
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
critical_enter();
|
|
|
|
cpu = curcpu;
|
2002-03-19 09:11:49 +00:00
|
|
|
cache = &zone->uz_cpu[cpu];
|
|
|
|
|
|
|
|
zfree_start:
|
|
|
|
bucket = cache->uc_freebucket;
|
|
|
|
|
|
|
|
if (bucket) {
|
2002-04-08 02:42:55 +00:00
|
|
|
/*
|
|
|
|
* Do we have room in our bucket? It is OK for this uz count
|
|
|
|
* check to be slightly out of sync.
|
|
|
|
*/
|
|
|
|
|
2003-09-19 06:26:45 +00:00
|
|
|
if (bucket->ub_cnt < bucket->ub_entries) {
|
|
|
|
KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
|
2002-03-19 09:11:49 +00:00
|
|
|
("uma_zfree: Freeing to non free bucket index."));
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket->ub_bucket[bucket->ub_cnt] = item;
|
|
|
|
bucket->ub_cnt++;
|
2005-07-14 16:17:21 +00:00
|
|
|
cache->uc_frees++;
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
critical_exit();
|
2002-03-19 09:11:49 +00:00
|
|
|
return;
|
|
|
|
} else if (cache->uc_allocbucket) {
|
|
|
|
#ifdef UMA_DEBUG_ALLOC
|
|
|
|
printf("uma_zfree: Swapping buckets.\n");
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* We have run out of space in our freebucket.
|
|
|
|
* See if we can switch with our alloc bucket.
|
|
|
|
*/
|
2004-01-30 16:26:29 +00:00
|
|
|
if (cache->uc_allocbucket->ub_cnt <
|
2003-09-19 06:26:45 +00:00
|
|
|
cache->uc_freebucket->ub_cnt) {
|
2003-09-19 06:41:06 +00:00
|
|
|
bucket = cache->uc_freebucket;
|
2002-03-19 09:11:49 +00:00
|
|
|
cache->uc_freebucket = cache->uc_allocbucket;
|
2003-09-19 06:41:06 +00:00
|
|
|
cache->uc_allocbucket = bucket;
|
2002-03-19 09:11:49 +00:00
|
|
|
goto zfree_start;
|
|
|
|
}
|
|
|
|
}
|
2004-01-30 16:26:29 +00:00
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
/*
|
2002-04-08 02:42:55 +00:00
|
|
|
* We can get here for two reasons:
|
2002-03-19 09:11:49 +00:00
|
|
|
*
|
|
|
|
* 1) The buckets are NULL
|
2002-04-08 02:42:55 +00:00
|
|
|
* 2) The alloc and free buckets are both somewhat full.
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
*
|
|
|
|
* We must go back the zone, which requires acquiring the zone lock,
|
|
|
|
* which in turn means we must release and re-acquire the critical
|
|
|
|
* section. Since the critical section is released, we may be
|
|
|
|
* preempted or migrate. As such, make sure not to maintain any
|
|
|
|
* thread-local state specific to the cache from prior to releasing
|
|
|
|
* the critical section.
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
critical_exit();
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_LOCK(zone);
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
critical_enter();
|
|
|
|
cpu = curcpu;
|
|
|
|
cache = &zone->uz_cpu[cpu];
|
|
|
|
if (cache->uc_freebucket != NULL) {
|
|
|
|
if (cache->uc_freebucket->ub_cnt <
|
|
|
|
cache->uc_freebucket->ub_entries) {
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
goto zfree_start;
|
|
|
|
}
|
|
|
|
if (cache->uc_allocbucket != NULL &&
|
|
|
|
(cache->uc_allocbucket->ub_cnt <
|
|
|
|
cache->uc_freebucket->ub_cnt)) {
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
goto zfree_start;
|
|
|
|
}
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2005-07-20 18:47:42 +00:00
|
|
|
/* Since we have locked the zone we may as well send back our stats */
|
|
|
|
zone->uz_allocs += cache->uc_allocs;
|
|
|
|
cache->uc_allocs = 0;
|
|
|
|
zone->uz_frees += cache->uc_frees;
|
|
|
|
cache->uc_frees = 0;
|
|
|
|
|
2002-04-08 02:42:55 +00:00
|
|
|
bucket = cache->uc_freebucket;
|
|
|
|
cache->uc_freebucket = NULL;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2002-04-08 02:42:55 +00:00
|
|
|
/* Can we throw this on the zone full list? */
|
|
|
|
if (bucket != NULL) {
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef UMA_DEBUG_ALLOC
|
2002-04-08 02:42:55 +00:00
|
|
|
printf("uma_zfree: Putting old bucket on the free list.\n");
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
2003-09-19 06:26:45 +00:00
|
|
|
/* ub_cnt is pointing to the last free item */
|
|
|
|
KASSERT(bucket->ub_cnt != 0,
|
2002-04-08 02:42:55 +00:00
|
|
|
("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
|
|
|
|
LIST_INSERT_HEAD(&zone->uz_full_bucket,
|
|
|
|
bucket, ub_link);
|
|
|
|
}
|
|
|
|
if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
|
|
|
|
LIST_REMOVE(bucket, ub_link);
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
cache->uc_freebucket = bucket;
|
|
|
|
goto zfree_start;
|
|
|
|
}
|
Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks. We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access. In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.
Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.
Reviewed by: bmilekic, jeff (somewhat)
Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
2005-04-29 18:56:36 +00:00
|
|
|
/* We are no longer associated with this CPU. */
|
|
|
|
critical_exit();
|
2002-04-08 02:42:55 +00:00
|
|
|
|
|
|
|
/* And the zone.. */
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef UMA_DEBUG_ALLOC
|
2002-04-08 02:42:55 +00:00
|
|
|
printf("uma_zfree: Allocating new free bucket.\n");
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
2002-06-17 23:53:58 +00:00
|
|
|
bflags = M_NOWAIT;
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
|
2002-06-17 23:53:58 +00:00
|
|
|
bflags |= M_NOVM;
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket = bucket_alloc(zone->uz_count, bflags);
|
2002-06-17 23:53:58 +00:00
|
|
|
if (bucket) {
|
2002-04-08 02:42:55 +00:00
|
|
|
ZONE_LOCK(zone);
|
|
|
|
LIST_INSERT_HEAD(&zone->uz_free_bucket,
|
|
|
|
bucket, ub_link);
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
2002-04-08 02:42:55 +00:00
|
|
|
goto zfree_restart;
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2002-04-08 02:42:55 +00:00
|
|
|
/*
|
|
|
|
* If nothing else caught this, we'll just do an internal free.
|
|
|
|
*/
|
2002-04-14 01:56:25 +00:00
|
|
|
zfree_internal:
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Frees an item to an INTERNAL zone or allocates a free bucket
|
|
|
|
*
|
|
|
|
* Arguments:
|
|
|
|
* zone The zone to free to
|
|
|
|
* item The item we're freeing
|
|
|
|
* udata User supplied data for the dtor
|
2004-08-02 00:18:36 +00:00
|
|
|
* skip Skip dtors and finis
|
2002-03-19 09:11:49 +00:00
|
|
|
*/
|
|
|
|
static void
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(uma_zone_t zone, void *item, void *udata,
|
2005-07-15 23:34:39 +00:00
|
|
|
enum zfreeskip skip, int flags)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
|
|
|
uma_slab_t slab;
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
uma_slabrefcnt_t slabref;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
2002-03-19 09:11:49 +00:00
|
|
|
u_int8_t *mem;
|
|
|
|
u_int8_t freei;
|
2009-01-25 09:11:24 +00:00
|
|
|
int clearfull;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
|
2004-08-02 00:18:36 +00:00
|
|
|
if (skip < SKIP_DTOR && zone->uz_dtor)
|
2009-01-25 09:11:24 +00:00
|
|
|
zone->uz_dtor(item, zone->uz_size, udata);
|
|
|
|
|
2004-08-02 00:18:36 +00:00
|
|
|
if (skip < SKIP_FINI && zone->uz_fini)
|
2009-01-25 09:11:24 +00:00
|
|
|
zone->uz_fini(item, zone->uz_size);
|
2002-10-24 06:17:30 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_LOCK(zone);
|
|
|
|
|
2005-07-15 23:34:39 +00:00
|
|
|
if (flags & ZFREE_STATFAIL)
|
|
|
|
zone->uz_fails++;
|
2005-07-20 18:47:42 +00:00
|
|
|
if (flags & ZFREE_STATFREE)
|
|
|
|
zone->uz_frees++;
|
2005-07-15 23:34:39 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
|
2002-03-19 09:11:49 +00:00
|
|
|
mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone); /* Must only be one. */
|
|
|
|
if (zone->uz_flags & UMA_ZONE_HASH) {
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
slab = hash_sfind(&keg->uk_hash, mem);
|
2009-01-25 09:11:24 +00:00
|
|
|
} else {
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
mem += keg->uk_pgoff;
|
2002-03-19 09:11:49 +00:00
|
|
|
slab = (uma_slab_t)mem;
|
|
|
|
}
|
|
|
|
} else {
|
2009-01-25 09:11:24 +00:00
|
|
|
/* This prevents redundant lookups via free(). */
|
|
|
|
if ((zone->uz_flags & UMA_ZONE_MALLOC) && udata != NULL)
|
|
|
|
slab = (uma_slab_t)udata;
|
|
|
|
else
|
|
|
|
slab = vtoslab((vm_offset_t)item);
|
|
|
|
keg = slab->us_keg;
|
|
|
|
keg_relock(keg, zone);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
2009-01-25 09:11:24 +00:00
|
|
|
MPASS(keg == slab->us_keg);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
|
|
|
/* Do we need to remove from any lists? */
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (slab->us_freecount+1 == keg->uk_ipers) {
|
2003-06-09 22:51:36 +00:00
|
|
|
LIST_REMOVE(slab, us_link);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
|
2002-03-19 09:11:49 +00:00
|
|
|
} else if (slab->us_freecount == 0) {
|
|
|
|
LIST_REMOVE(slab, us_link);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2004-01-30 16:26:29 +00:00
|
|
|
/* Slab management stuff */
|
2002-03-19 09:11:49 +00:00
|
|
|
freei = ((unsigned long)item - (unsigned long)slab->us_data)
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/ keg->uk_rsize;
|
2002-05-02 02:08:48 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
#ifdef INVARIANTS
|
2002-05-02 02:08:48 +00:00
|
|
|
if (!skip)
|
|
|
|
uma_dbg_free(zone, slab, item);
|
2002-03-19 09:11:49 +00:00
|
|
|
#endif
|
2002-05-02 02:08:48 +00:00
|
|
|
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
if (keg->uk_flags & UMA_ZONE_REFCNT) {
|
|
|
|
slabref = (uma_slabrefcnt_t)slab;
|
|
|
|
slabref->us_freelist[freei].us_item = slab->us_firstfree;
|
|
|
|
} else {
|
|
|
|
slab->us_freelist[freei].us_item = slab->us_firstfree;
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
slab->us_firstfree = freei;
|
|
|
|
slab->us_freecount++;
|
|
|
|
|
|
|
|
/* Zone statistics */
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_free++;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
clearfull = 0;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_flags & UMA_ZFLAG_FULL) {
|
2009-01-25 09:11:24 +00:00
|
|
|
if (keg->uk_pages < keg->uk_maxpages) {
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_flags &= ~UMA_ZFLAG_FULL;
|
2009-01-25 09:11:24 +00:00
|
|
|
clearfull = 1;
|
|
|
|
}
|
2002-04-14 01:56:25 +00:00
|
|
|
|
2007-01-24 22:49:11 +00:00
|
|
|
/*
|
|
|
|
* We can handle one more allocation. Since we're clearing ZFLAG_FULL,
|
|
|
|
* wake up all procs blocked on pages. This should be uncommon, so
|
|
|
|
* keeping this simple for now (rather than adding count of blocked
|
|
|
|
* threads etc).
|
|
|
|
*/
|
|
|
|
wakeup(keg);
|
2002-04-14 01:56:25 +00:00
|
|
|
}
|
2009-01-25 09:11:24 +00:00
|
|
|
if (clearfull) {
|
|
|
|
zone_relock(zone, keg);
|
|
|
|
zone->uz_flags &= ~UMA_ZFLAG_FULL;
|
|
|
|
wakeup(zone);
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
} else
|
|
|
|
KEG_UNLOCK(keg);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2002-03-20 05:28:34 +00:00
|
|
|
/* See uma.h */
|
2010-10-16 04:41:45 +00:00
|
|
|
int
|
2002-03-20 05:28:34 +00:00
|
|
|
uma_zone_set_max(uma_zone_t zone, int nitems)
|
|
|
|
{
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
|
|
|
|
2002-03-20 05:28:34 +00:00
|
|
|
ZONE_LOCK(zone);
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone);
|
|
|
|
keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (keg->uk_maxpages * keg->uk_ipers < nitems)
|
2009-01-25 09:11:24 +00:00
|
|
|
keg->uk_maxpages += keg->uk_ppera;
|
2010-10-16 04:41:45 +00:00
|
|
|
nitems = keg->uk_maxpages * keg->uk_ipers;
|
2002-03-20 05:28:34 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
2010-10-16 04:41:45 +00:00
|
|
|
|
|
|
|
return (nitems);
|
2002-03-20 05:28:34 +00:00
|
|
|
}
|
|
|
|
|
2010-08-16 14:24:00 +00:00
|
|
|
/* See uma.h */
|
|
|
|
int
|
|
|
|
uma_zone_get_max(uma_zone_t zone)
|
|
|
|
{
|
|
|
|
int nitems;
|
|
|
|
uma_keg_t keg;
|
|
|
|
|
|
|
|
ZONE_LOCK(zone);
|
|
|
|
keg = zone_first_keg(zone);
|
2010-10-16 04:14:45 +00:00
|
|
|
nitems = keg->uk_maxpages * keg->uk_ipers;
|
2010-08-16 14:24:00 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
|
|
|
|
return (nitems);
|
|
|
|
}
|
|
|
|
|
2010-10-16 04:14:45 +00:00
|
|
|
/* See uma.h */
|
|
|
|
int
|
|
|
|
uma_zone_get_cur(uma_zone_t zone)
|
|
|
|
{
|
|
|
|
int64_t nitems;
|
|
|
|
u_int i;
|
|
|
|
|
|
|
|
ZONE_LOCK(zone);
|
|
|
|
nitems = zone->uz_allocs - zone->uz_frees;
|
|
|
|
CPU_FOREACH(i) {
|
|
|
|
/*
|
|
|
|
* See the comment in sysctl_vm_zone_stats() regarding the
|
|
|
|
* safety of accessing the per-cpu caches. With the zone lock
|
|
|
|
* held, it is safe, but can potentially result in stale data.
|
|
|
|
*/
|
|
|
|
nitems += zone->uz_cpu[i].uc_allocs -
|
|
|
|
zone->uz_cpu[i].uc_frees;
|
|
|
|
}
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
|
|
|
|
return (nitems < 0 ? 0 : nitems);
|
|
|
|
}
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/* See uma.h */
|
|
|
|
void
|
|
|
|
uma_zone_set_init(uma_zone_t zone, uma_init uminit)
|
|
|
|
{
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_keg_t keg;
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
ZONE_LOCK(zone);
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone);
|
|
|
|
KASSERT(keg->uk_pages == 0,
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
("uma_zone_set_init on non-empty keg"));
|
2009-01-25 09:11:24 +00:00
|
|
|
keg->uk_init = uminit;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See uma.h */
|
|
|
|
void
|
|
|
|
uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
|
|
|
|
{
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_keg_t keg;
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
ZONE_LOCK(zone);
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone);
|
|
|
|
KASSERT(keg->uk_pages == 0,
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
("uma_zone_set_fini on non-empty keg"));
|
2009-01-25 09:11:24 +00:00
|
|
|
keg->uk_fini = fini;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See uma.h */
|
|
|
|
void
|
|
|
|
uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
|
|
|
|
{
|
|
|
|
ZONE_LOCK(zone);
|
2009-01-25 09:11:24 +00:00
|
|
|
KASSERT(zone_first_keg(zone)->uk_pages == 0,
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
("uma_zone_set_zinit on non-empty keg"));
|
|
|
|
zone->uz_init = zinit;
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See uma.h */
|
|
|
|
void
|
|
|
|
uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
|
|
|
|
{
|
|
|
|
ZONE_LOCK(zone);
|
2009-01-25 09:11:24 +00:00
|
|
|
KASSERT(zone_first_keg(zone)->uk_pages == 0,
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
("uma_zone_set_zfini on non-empty keg"));
|
|
|
|
zone->uz_fini = zfini;
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
}
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/* See uma.h */
|
2004-08-02 00:18:36 +00:00
|
|
|
/* XXX uk_freef is not actually used with the zone locked */
|
2002-03-19 09:11:49 +00:00
|
|
|
void
|
|
|
|
uma_zone_set_freef(uma_zone_t zone, uma_free freef)
|
|
|
|
{
|
2009-01-25 09:11:24 +00:00
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_LOCK(zone);
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_first_keg(zone)->uk_freef = freef;
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See uma.h */
|
2004-08-02 00:18:36 +00:00
|
|
|
/* XXX uk_allocf is not actually used with the zone locked */
|
2002-03-19 09:11:49 +00:00
|
|
|
void
|
|
|
|
uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
|
|
|
|
{
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_keg_t keg;
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_LOCK(zone);
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone);
|
|
|
|
keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
|
|
|
|
keg->uk_allocf = allocf;
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See uma.h */
|
|
|
|
int
|
|
|
|
uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
|
|
|
|
{
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
2002-03-19 09:11:49 +00:00
|
|
|
vm_offset_t kva;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
int pages;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
pages = count / keg->uk_ipers;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
if (pages * keg->uk_ipers < count)
|
2002-03-19 09:11:49 +00:00
|
|
|
pages++;
|
|
|
|
|
2004-07-22 19:44:49 +00:00
|
|
|
kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2003-11-14 17:49:07 +00:00
|
|
|
if (kva == 0)
|
2002-03-19 09:11:49 +00:00
|
|
|
return (0);
|
2010-05-03 17:35:31 +00:00
|
|
|
if (obj == NULL)
|
|
|
|
obj = vm_object_allocate(OBJT_PHYS, pages);
|
|
|
|
else {
|
2004-07-22 19:44:49 +00:00
|
|
|
VM_OBJECT_LOCK_INIT(obj, "uma object");
|
2010-05-03 17:35:31 +00:00
|
|
|
_vm_object_allocate(OBJT_PHYS, pages, obj);
|
2003-04-28 06:11:32 +00:00
|
|
|
}
|
2002-04-08 02:42:55 +00:00
|
|
|
ZONE_LOCK(zone);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_kva = kva;
|
|
|
|
keg->uk_obj = obj;
|
|
|
|
keg->uk_maxpages = pages;
|
|
|
|
keg->uk_allocf = obj_alloc;
|
|
|
|
keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See uma.h */
|
|
|
|
void
|
|
|
|
uma_prealloc(uma_zone_t zone, int items)
|
|
|
|
{
|
|
|
|
int slabs;
|
|
|
|
uma_slab_t slab;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
2002-03-19 09:11:49 +00:00
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = zone_first_keg(zone);
|
2002-03-19 09:11:49 +00:00
|
|
|
ZONE_LOCK(zone);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
slabs = items / keg->uk_ipers;
|
|
|
|
if (slabs * keg->uk_ipers < items)
|
2002-03-19 09:11:49 +00:00
|
|
|
slabs++;
|
|
|
|
while (slabs > 0) {
|
2009-01-25 09:11:24 +00:00
|
|
|
slab = keg_alloc_slab(keg, zone, M_WAITOK);
|
|
|
|
if (slab == NULL)
|
|
|
|
break;
|
|
|
|
MPASS(slab->us_keg == keg);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
|
2002-03-19 09:11:49 +00:00
|
|
|
slabs--;
|
|
|
|
}
|
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
}
|
|
|
|
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/* See uma.h */
|
|
|
|
u_int32_t *
|
|
|
|
uma_find_refcnt(uma_zone_t zone, void *item)
|
|
|
|
{
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
uma_slabrefcnt_t slabref;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
uma_keg_t keg;
|
|
|
|
u_int32_t *refcnt;
|
|
|
|
int idx;
|
|
|
|
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
|
|
|
|
(~UMA_SLAB_MASK));
|
2009-01-25 09:11:24 +00:00
|
|
|
keg = slabref->us_keg;
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
idx = ((unsigned long)item - (unsigned long)slabref->us_data)
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
/ keg->uk_rsize;
|
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
2004-10-08 20:19:29 +00:00
|
|
|
refcnt = &slabref->us_freelist[idx].us_refcnt;
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
return refcnt;
|
|
|
|
}
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
/* See uma.h */
|
|
|
|
void
|
|
|
|
uma_reclaim(void)
|
|
|
|
{
|
|
|
|
#ifdef UMA_DEBUG
|
|
|
|
printf("UMA: vm asked us to release pages!\n");
|
|
|
|
#endif
|
2002-04-08 06:20:34 +00:00
|
|
|
bucket_enable();
|
2002-03-19 09:11:49 +00:00
|
|
|
zone_foreach(zone_drain);
|
|
|
|
/*
|
|
|
|
* Some slabs may have been freed but this zone will be visited early
|
|
|
|
* we visit again so that we can free pages that are empty once other
|
|
|
|
* zones are drained. We have to do the same for buckets.
|
|
|
|
*/
|
2003-09-19 23:27:46 +00:00
|
|
|
zone_drain(slabzone);
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
zone_drain(slabrefzone);
|
2003-09-19 06:26:45 +00:00
|
|
|
bucket_zone_drain();
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2007-01-05 19:09:01 +00:00
|
|
|
/* See uma.h */
|
|
|
|
int
|
|
|
|
uma_zone_exhausted(uma_zone_t zone)
|
|
|
|
{
|
|
|
|
int full;
|
|
|
|
|
|
|
|
ZONE_LOCK(zone);
|
2009-01-25 09:11:24 +00:00
|
|
|
full = (zone->uz_flags & UMA_ZFLAG_FULL);
|
2007-01-05 19:09:01 +00:00
|
|
|
ZONE_UNLOCK(zone);
|
|
|
|
return (full);
|
|
|
|
}
|
|
|
|
|
2007-01-25 01:05:23 +00:00
|
|
|
int
|
|
|
|
uma_zone_exhausted_nolock(uma_zone_t zone)
|
|
|
|
{
|
2009-01-25 09:11:24 +00:00
|
|
|
return (zone->uz_flags & UMA_ZFLAG_FULL);
|
2007-01-25 01:05:23 +00:00
|
|
|
}
|
|
|
|
|
2002-03-19 09:11:49 +00:00
|
|
|
void *
|
|
|
|
uma_large_malloc(int size, int wait)
|
|
|
|
{
|
|
|
|
void *mem;
|
|
|
|
uma_slab_t slab;
|
|
|
|
u_int8_t flags;
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
slab = zone_alloc_item(slabzone, NULL, wait);
|
2002-03-19 09:11:49 +00:00
|
|
|
if (slab == NULL)
|
|
|
|
return (NULL);
|
2003-07-26 03:55:32 +00:00
|
|
|
mem = page_alloc(NULL, size, &flags, wait);
|
2002-03-19 09:11:49 +00:00
|
|
|
if (mem) {
|
2002-09-18 08:26:30 +00:00
|
|
|
vsetslab((vm_offset_t)mem, slab);
|
2002-03-19 09:11:49 +00:00
|
|
|
slab->us_data = mem;
|
|
|
|
slab->us_flags = flags | UMA_SLAB_MALLOC;
|
|
|
|
slab->us_size = size;
|
|
|
|
} else {
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(slabzone, slab, NULL, SKIP_NONE,
|
2005-07-20 18:47:42 +00:00
|
|
|
ZFREE_STATFAIL | ZFREE_STATFREE);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return (mem);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
uma_large_free(uma_slab_t slab)
|
|
|
|
{
|
2002-09-18 08:26:30 +00:00
|
|
|
vsetobj((vm_offset_t)slab->us_data, kmem_object);
|
2004-03-10 04:44:43 +00:00
|
|
|
page_free(slab->us_data, slab->us_size, slab->us_flags);
|
2009-01-25 09:11:24 +00:00
|
|
|
zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
uma_print_stats(void)
|
|
|
|
{
|
|
|
|
zone_foreach(uma_print_zone);
|
|
|
|
}
|
|
|
|
|
2003-11-30 08:04:01 +00:00
|
|
|
static void
|
|
|
|
slab_print(uma_slab_t slab)
|
|
|
|
{
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
|
|
|
|
slab->us_keg, slab->us_data, slab->us_freecount,
|
2003-11-30 08:04:01 +00:00
|
|
|
slab->us_firstfree);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
cache_print(uma_cache_t cache)
|
|
|
|
{
|
2004-01-30 16:26:29 +00:00
|
|
|
printf("alloc: %p(%d), free: %p(%d)\n",
|
2003-11-30 08:04:01 +00:00
|
|
|
cache->uc_allocbucket,
|
|
|
|
cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
|
|
|
|
cache->uc_freebucket,
|
|
|
|
cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
|
|
|
|
}
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
static void
|
|
|
|
uma_print_keg(uma_keg_t keg)
|
2002-03-19 09:11:49 +00:00
|
|
|
{
|
2003-11-30 08:04:01 +00:00
|
|
|
uma_slab_t slab;
|
|
|
|
|
2009-01-25 09:11:24 +00:00
|
|
|
printf("keg: %s(%p) size %d(%d) flags %d ipers %d ppera %d "
|
|
|
|
"out %d free %d limit %d\n",
|
|
|
|
keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
keg->uk_ipers, keg->uk_ppera,
|
2009-01-25 09:11:24 +00:00
|
|
|
(keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
|
|
|
|
(keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
|
2003-11-30 08:04:01 +00:00
|
|
|
printf("Part slabs:\n");
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
|
2003-11-30 08:04:01 +00:00
|
|
|
slab_print(slab);
|
|
|
|
printf("Free slabs:\n");
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
|
2003-11-30 08:04:01 +00:00
|
|
|
slab_print(slab);
|
|
|
|
printf("Full slabs:\n");
|
Bring in mbuma to replace mballoc.
mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.
Extensions to UMA worth noting:
- Better layering between slab <-> zone caches; introduce
Keg structure which splits off slab cache away from the
zone structure and allows multiple zones to be stacked
on top of a single Keg (single type of slab cache);
perhaps we should look into defining a subset API on
top of the Keg for special use by malloc(9),
for example.
- UMA_ZONE_REFCNT zones can now be added, and reference
counters automagically allocated for them within the end
of the associated slab structures. uma_find_refcnt()
does a kextract to fetch the slab struct reference from
the underlying page, and lookup the corresponding refcnt.
mbuma things worth noting:
- integrates mbuf & cluster allocations with extended UMA
and provides caches for commonly-allocated items; defines
several zones (two primary, one secondary) and two kegs.
- change up certain code paths that always used to do:
m_get() + m_clget() to instead just use m_getcl() and
try to take advantage of the newly defined secondary
Packet zone.
- netstat(1) and systat(1) quickly hacked up to do basic
stat reporting but additional stats work needs to be
done once some other details within UMA have been taken
care of and it becomes clearer to how stats will work
within the modified framework.
From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used. The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.
Additional things worth noting/known issues (READ):
- One report of 'ips' (ServeRAID) driver acting really
slow in conjunction with mbuma. Need more data.
Latest report is that ips is equally sucking with
and without mbuma.
- Giant leak in NFS code sometimes occurs, can't
reproduce but currently analyzing; brueffer is
able to reproduce but THIS IS NOT an mbuma-specific
problem and currently occurs even WITHOUT mbuma.
- Issues in network locking: there is at least one
code path in the rip code where one or more locks
are acquired and we end up in m_prepend() with
M_WAITOK, which causes WITNESS to whine from within
UMA. Current temporary solution: force all UMA
allocations to be M_NOWAIT from within UMA for now
to avoid deadlocks unless WITNESS is defined and we
can determine with certainty that we're not holding
any locks when we're M_WAITOK.
- I've seen at least one weird socketbuffer empty-but-
mbuf-still-attached panic. I don't believe this
to be related to mbuma but please keep your eyes
open, turn on debugging, and capture crash dumps.
This change removes more code than it adds.
A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.
Testing and Debugging:
rwatson,
brueffer,
Ketrien I. Saihr-Kesenchedra,
...
Reviewed by: Lots of people (for different parts)
2004-05-31 21:46:06 +00:00
|
|
|
LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
|
2003-11-30 08:04:01 +00:00
|
|
|
slab_print(slab);
|
2009-01-25 09:11:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
uma_print_zone(uma_zone_t zone)
|
|
|
|
{
|
|
|
|
uma_cache_t cache;
|
|
|
|
uma_klink_t kl;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
printf("zone: %s(%p) size %d flags %d\n",
|
|
|
|
zone->uz_name, zone, zone->uz_size, zone->uz_flags);
|
|
|
|
LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
|
|
|
|
uma_print_keg(kl->kl_keg);
|
2010-06-11 18:46:34 +00:00
|
|
|
CPU_FOREACH(i) {
|
2003-11-30 08:04:01 +00:00
|
|
|
cache = &zone->uz_cpu[i];
|
|
|
|
printf("CPU %d Cache:\n", i);
|
|
|
|
cache_print(cache);
|
|
|
|
}
|
2002-03-19 09:11:49 +00:00
|
|
|
}
|
|
|
|
|
2006-07-18 01:13:18 +00:00
|
|
|
#ifdef DDB
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
/*
|
|
|
|
* Generate statistics across both the zone and its per-cpu cache's. Return
|
|
|
|
* desired statistics if the pointer is non-NULL for that statistic.
|
|
|
|
*
|
|
|
|
* Note: does not update the zone statistics, as it can't safely clear the
|
|
|
|
* per-CPU cache statistic.
|
|
|
|
*
|
|
|
|
* XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
|
|
|
|
* safe from off-CPU; we should modify the caches to track this information
|
|
|
|
* directly so that we don't have to.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
|
2010-06-15 19:28:37 +00:00
|
|
|
u_int64_t *freesp, u_int64_t *sleepsp)
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
{
|
|
|
|
uma_cache_t cache;
|
2010-06-15 19:28:37 +00:00
|
|
|
u_int64_t allocs, frees, sleeps;
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
int cachefree, cpu;
|
|
|
|
|
2010-06-15 19:28:37 +00:00
|
|
|
allocs = frees = sleeps = 0;
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
cachefree = 0;
|
2010-06-11 18:46:34 +00:00
|
|
|
CPU_FOREACH(cpu) {
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
cache = &z->uz_cpu[cpu];
|
|
|
|
if (cache->uc_allocbucket != NULL)
|
|
|
|
cachefree += cache->uc_allocbucket->ub_cnt;
|
|
|
|
if (cache->uc_freebucket != NULL)
|
|
|
|
cachefree += cache->uc_freebucket->ub_cnt;
|
|
|
|
allocs += cache->uc_allocs;
|
|
|
|
frees += cache->uc_frees;
|
|
|
|
}
|
|
|
|
allocs += z->uz_allocs;
|
|
|
|
frees += z->uz_frees;
|
2010-06-15 19:28:37 +00:00
|
|
|
sleeps += z->uz_sleeps;
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
if (cachefreep != NULL)
|
|
|
|
*cachefreep = cachefree;
|
|
|
|
if (allocsp != NULL)
|
|
|
|
*allocsp = allocs;
|
|
|
|
if (freesp != NULL)
|
|
|
|
*freesp = frees;
|
2010-06-15 19:28:37 +00:00
|
|
|
if (sleepsp != NULL)
|
|
|
|
*sleepsp = sleeps;
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
}
|
2006-07-18 01:13:18 +00:00
|
|
|
#endif /* DDB */
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
|
|
|
|
static int
|
|
|
|
sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
|
|
|
|
{
|
|
|
|
uma_keg_t kz;
|
|
|
|
uma_zone_t z;
|
|
|
|
int count;
|
|
|
|
|
|
|
|
count = 0;
|
|
|
|
mtx_lock(&uma_mtx);
|
|
|
|
LIST_FOREACH(kz, &uma_kegs, uk_link) {
|
|
|
|
LIST_FOREACH(z, &kz->uk_zones, uz_link)
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
mtx_unlock(&uma_mtx);
|
|
|
|
return (sysctl_handle_int(oidp, &count, 0, req));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
|
|
|
|
{
|
|
|
|
struct uma_stream_header ush;
|
|
|
|
struct uma_type_header uth;
|
|
|
|
struct uma_percpu_stat ups;
|
|
|
|
uma_bucket_t bucket;
|
|
|
|
struct sbuf sbuf;
|
|
|
|
uma_cache_t cache;
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_klink_t kl;
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
uma_keg_t kz;
|
|
|
|
uma_zone_t z;
|
2009-01-25 09:11:24 +00:00
|
|
|
uma_keg_t k;
|
2010-09-16 16:13:12 +00:00
|
|
|
int count, error, i;
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
|
2011-01-27 00:34:12 +00:00
|
|
|
error = sysctl_wire_old_buffer(req, 0);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
2010-09-16 16:13:12 +00:00
|
|
|
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
|
2010-09-13 18:48:23 +00:00
|
|
|
|
2010-09-16 16:13:12 +00:00
|
|
|
count = 0;
|
2010-09-13 18:48:23 +00:00
|
|
|
mtx_lock(&uma_mtx);
|
|
|
|
LIST_FOREACH(kz, &uma_kegs, uk_link) {
|
|
|
|
LIST_FOREACH(z, &kz->uk_zones, uz_link)
|
2010-09-16 16:13:12 +00:00
|
|
|
count++;
|
2010-09-13 18:48:23 +00:00
|
|
|
}
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Insert stream header.
|
|
|
|
*/
|
|
|
|
bzero(&ush, sizeof(ush));
|
|
|
|
ush.ush_version = UMA_STREAM_VERSION;
|
2005-07-16 11:03:06 +00:00
|
|
|
ush.ush_maxcpus = (mp_maxid + 1);
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
ush.ush_count = count;
|
2010-09-16 16:13:12 +00:00
|
|
|
(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
|
|
|
|
LIST_FOREACH(kz, &uma_kegs, uk_link) {
|
|
|
|
LIST_FOREACH(z, &kz->uk_zones, uz_link) {
|
|
|
|
bzero(&uth, sizeof(uth));
|
|
|
|
ZONE_LOCK(z);
|
2005-07-25 00:47:32 +00:00
|
|
|
strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
uth.uth_align = kz->uk_align;
|
|
|
|
uth.uth_size = kz->uk_size;
|
|
|
|
uth.uth_rsize = kz->uk_rsize;
|
2009-01-25 09:11:24 +00:00
|
|
|
LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
|
|
|
|
k = kl->kl_keg;
|
|
|
|
uth.uth_maxpages += k->uk_maxpages;
|
|
|
|
uth.uth_pages += k->uk_pages;
|
|
|
|
uth.uth_keg_free += k->uk_free;
|
|
|
|
uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
|
|
|
|
* k->uk_ipers;
|
|
|
|
}
|
2005-07-25 00:47:32 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A zone is secondary is it is not the first entry
|
|
|
|
* on the keg's zone list.
|
|
|
|
*/
|
2009-01-25 09:11:24 +00:00
|
|
|
if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
|
2005-07-25 00:47:32 +00:00
|
|
|
(LIST_FIRST(&kz->uk_zones) != z))
|
|
|
|
uth.uth_zone_flags = UTH_ZONE_SECONDARY;
|
|
|
|
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
|
|
|
|
uth.uth_zone_free += bucket->ub_cnt;
|
|
|
|
uth.uth_allocs = z->uz_allocs;
|
|
|
|
uth.uth_frees = z->uz_frees;
|
2005-07-15 23:34:39 +00:00
|
|
|
uth.uth_fails = z->uz_fails;
|
2010-06-15 19:28:37 +00:00
|
|
|
uth.uth_sleeps = z->uz_sleeps;
|
2010-09-16 16:13:12 +00:00
|
|
|
(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
/*
|
2005-07-16 09:40:34 +00:00
|
|
|
* While it is not normally safe to access the cache
|
|
|
|
* bucket pointers while not on the CPU that owns the
|
|
|
|
* cache, we only allow the pointers to be exchanged
|
|
|
|
* without the zone lock held, not invalidated, so
|
|
|
|
* accept the possible race associated with bucket
|
|
|
|
* exchange during monitoring.
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
*/
|
2005-07-16 11:03:06 +00:00
|
|
|
for (i = 0; i < (mp_maxid + 1); i++) {
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
bzero(&ups, sizeof(ups));
|
|
|
|
if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
|
|
|
|
goto skip;
|
2006-02-11 19:20:56 +00:00
|
|
|
if (CPU_ABSENT(i))
|
|
|
|
goto skip;
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
cache = &z->uz_cpu[i];
|
|
|
|
if (cache->uc_allocbucket != NULL)
|
|
|
|
ups.ups_cache_free +=
|
|
|
|
cache->uc_allocbucket->ub_cnt;
|
|
|
|
if (cache->uc_freebucket != NULL)
|
|
|
|
ups.ups_cache_free +=
|
|
|
|
cache->uc_freebucket->ub_cnt;
|
|
|
|
ups.ups_allocs = cache->uc_allocs;
|
|
|
|
ups.ups_frees = cache->uc_frees;
|
|
|
|
skip:
|
2010-09-16 16:13:12 +00:00
|
|
|
(void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
}
|
2005-07-16 09:40:34 +00:00
|
|
|
ZONE_UNLOCK(z);
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
mtx_unlock(&uma_mtx);
|
2010-09-16 16:13:12 +00:00
|
|
|
error = sbuf_finish(&sbuf);
|
|
|
|
sbuf_delete(&sbuf);
|
Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator
statistics via a binary structure stream:
- Add structure 'uma_stream_header', which defines a stream version,
definition of MAXCPUs used in the stream, and the number of zone
records in the stream.
- Add structure 'uma_type_header', which defines the name, alignment,
size, resource allocation limits, current pages allocated, preferred
bucket size, and central zone + keg statistics.
- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
includes the number of allocations and frees, as well as the number
of free items in the cache.
- When the sysctl is queried, return a stream header, followed by a
series of type descriptions, each consisting of a type header
followed by a series of MAXCPUs uma_percpu_stat structures holding
per-CPU allocation information. Typical values of MAXCPU will be
1 (UP compiled kernel) and 16 (SMP compiled kernel).
This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.
While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.
A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days. This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.
MFC after: 1 week
2005-07-14 16:35:13 +00:00
|
|
|
return (error);
|
|
|
|
}
|
2005-10-20 16:39:33 +00:00
|
|
|
|
|
|
|
#ifdef DDB
|
|
|
|
DB_SHOW_COMMAND(uma, db_show_uma)
|
|
|
|
{
|
2010-06-15 19:28:37 +00:00
|
|
|
u_int64_t allocs, frees, sleeps;
|
2005-10-20 16:39:33 +00:00
|
|
|
uma_bucket_t bucket;
|
|
|
|
uma_keg_t kz;
|
|
|
|
uma_zone_t z;
|
|
|
|
int cachefree;
|
|
|
|
|
2010-06-15 19:28:37 +00:00
|
|
|
db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
|
|
|
|
"Requests", "Sleeps");
|
2005-10-20 16:39:33 +00:00
|
|
|
LIST_FOREACH(kz, &uma_kegs, uk_link) {
|
|
|
|
LIST_FOREACH(z, &kz->uk_zones, uz_link) {
|
|
|
|
if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
|
|
|
|
allocs = z->uz_allocs;
|
|
|
|
frees = z->uz_frees;
|
2010-06-15 19:28:37 +00:00
|
|
|
sleeps = z->uz_sleeps;
|
2005-10-20 16:39:33 +00:00
|
|
|
cachefree = 0;
|
|
|
|
} else
|
|
|
|
uma_zone_sumstat(z, &cachefree, &allocs,
|
2010-06-15 19:28:37 +00:00
|
|
|
&frees, &sleeps);
|
2009-01-25 09:11:24 +00:00
|
|
|
if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
|
2005-10-20 16:39:33 +00:00
|
|
|
(LIST_FIRST(&kz->uk_zones) != z)))
|
|
|
|
cachefree += kz->uk_free;
|
|
|
|
LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
|
|
|
|
cachefree += bucket->ub_cnt;
|
2010-06-15 19:28:37 +00:00
|
|
|
db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
|
2006-10-26 12:55:32 +00:00
|
|
|
(uintmax_t)kz->uk_size,
|
|
|
|
(intmax_t)(allocs - frees), cachefree,
|
2010-06-15 19:28:37 +00:00
|
|
|
(uintmax_t)allocs, sleeps);
|
2005-10-20 16:39:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|