Bring in mbuma to replace mballoc.

mbuma is an Mbuf & Cluster allocator built on top of a number of
extensions to the UMA framework, all included herein.

Extensions to UMA worth noting:
  - Better layering between slab <-> zone caches; introduce
    Keg structure which splits off slab cache away from the
    zone structure and allows multiple zones to be stacked
    on top of a single Keg (single type of slab cache);
    perhaps we should look into defining a subset API on
    top of the Keg for special use by malloc(9),
    for example.
  - UMA_ZONE_REFCNT zones can now be added, and reference
    counters automagically allocated for them within the end
    of the associated slab structures.  uma_find_refcnt()
    does a kextract to fetch the slab struct reference from
    the underlying page, and lookup the corresponding refcnt.

mbuma things worth noting:
  - integrates mbuf & cluster allocations with extended UMA
    and provides caches for commonly-allocated items; defines
    several zones (two primary, one secondary) and two kegs.
  - change up certain code paths that always used to do:
    m_get() + m_clget() to instead just use m_getcl() and
    try to take advantage of the newly defined secondary
    Packet zone.
  - netstat(1) and systat(1) quickly hacked up to do basic
    stat reporting but additional stats work needs to be
    done once some other details within UMA have been taken
    care of and it becomes clearer to how stats will work
    within the modified framework.

From the user perspective, one implication is that the
NMBCLUSTERS compile-time option is no longer used.  The
maximum number of clusters is still capped off according
to maxusers, but it can be made unlimited by setting
the kern.ipc.nmbclusters boot-time tunable to zero.
Work should be done to write an appropriate sysctl
handler allowing dynamic tuning of kern.ipc.nmbclusters
at runtime.

Additional things worth noting/known issues (READ):
   - One report of 'ips' (ServeRAID) driver acting really
     slow in conjunction with mbuma.  Need more data.
     Latest report is that ips is equally sucking with
     and without mbuma.
   - Giant leak in NFS code sometimes occurs, can't
     reproduce but currently analyzing; brueffer is
     able to reproduce but THIS IS NOT an mbuma-specific
     problem and currently occurs even WITHOUT mbuma.
   - Issues in network locking: there is at least one
     code path in the rip code where one or more locks
     are acquired and we end up in m_prepend() with
     M_WAITOK, which causes WITNESS to whine from within
     UMA.  Current temporary solution: force all UMA
     allocations to be M_NOWAIT from within UMA for now
     to avoid deadlocks unless WITNESS is defined and we
     can determine with certainty that we're not holding
     any locks when we're M_WAITOK.
   - I've seen at least one weird socketbuffer empty-but-
     mbuf-still-attached panic.  I don't believe this
     to be related to mbuma but please keep your eyes
     open, turn on debugging, and capture crash dumps.

This change removes more code than it adds.

A paper is available detailing the change and considering
various performance issues, it was presented at BSDCan2004:
http://www.unixdaemons.com/~bmilekic/netbuf_bmilekic.pdf
Please read the paper for Future Work and implementation
details, as well as credits.

Testing and Debugging:
    rwatson,
    brueffer,
    Ketrien I. Saihr-Kesenchedra,
    ...
Reviewed by: Lots of people (for different parts)
This commit is contained in:
Bosko Milekic 2004-05-31 21:46:06 +00:00
parent 251b48a1bb
commit 099a0e588c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=129906
23 changed files with 1739 additions and 2375 deletions

View File

@ -1075,6 +1075,7 @@ kern/kern_lock.c standard
kern/kern_lockf.c standard
kern/kern_mac.c standard
kern/kern_malloc.c standard
kern/kern_mbuf.c standard
kern/kern_mib.c standard
kern/kern_module.c standard
kern/kern_mutex.c standard
@ -1116,7 +1117,6 @@ kern/subr_hints.c standard
kern/subr_kobj.c standard
kern/subr_log.c standard
kern/subr_mbpool.c optional libmbpool
kern/subr_mbuf.c standard
kern/subr_mchain.c optional libmchain
kern/subr_module.c standard
kern/subr_msgbuf.c standard

View File

@ -95,6 +95,10 @@ __FBSDID("$FreeBSD$");
#include <i386/isa/isa.h>
#endif
#ifndef NSFBUFS
#define NSFBUFS (512 + maxusers * 16)
#endif
static void cpu_reset_real(void);
#ifdef SMP
static void cpu_reset_proxy(void);
@ -584,6 +588,9 @@ sf_buf_init(void *arg)
vm_offset_t sf_base;
int i;
nsfbufs = NSFBUFS;
TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
TAILQ_INIT(&sf_buf_freelist);
sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);

View File

@ -191,6 +191,7 @@ malloc(size, type, flags)
int indx;
caddr_t va;
uma_zone_t zone;
uma_keg_t keg;
#ifdef DIAGNOSTIC
unsigned long osize = size;
#endif
@ -235,6 +236,7 @@ malloc(size, type, flags)
size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
indx = kmemsize[size >> KMEM_ZSHIFT];
zone = kmemzones[indx].kz_zone;
keg = zone->uz_keg;
#ifdef MALLOC_PROFILE
krequests[size >> KMEM_ZSHIFT]++;
#endif
@ -244,10 +246,11 @@ malloc(size, type, flags)
goto out;
ksp->ks_size |= 1 << indx;
size = zone->uz_size;
size = keg->uk_size;
} else {
size = roundup(size, PAGE_SIZE);
zone = NULL;
keg = NULL;
va = uma_large_malloc(size, flags);
mtx_lock(&ksp->ks_mtx);
if (va == NULL)
@ -309,7 +312,7 @@ free(addr, type)
#ifdef INVARIANTS
struct malloc_type **mtp = addr;
#endif
size = slab->us_zone->uz_size;
size = slab->us_keg->uk_size;
#ifdef INVARIANTS
/*
* Cache a pointer to the malloc_type that most recently freed
@ -325,7 +328,7 @@ free(addr, type)
sizeof(struct malloc_type *);
*mtp = type;
#endif
uma_zfree_arg(slab->us_zone, addr, slab);
uma_zfree_arg(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab);
} else {
size = slab->us_size;
uma_large_free(slab);
@ -364,8 +367,8 @@ realloc(addr, size, type, flags)
("realloc: address %p out of range", (void *)addr));
/* Get the size of the original block */
if (slab->us_zone)
alloc = slab->us_zone->uz_size;
if (slab->us_keg)
alloc = slab->us_keg->uk_size;
else
alloc = slab->us_size;
@ -410,7 +413,6 @@ kmeminit(dummy)
void *dummy;
{
u_int8_t indx;
u_long npg;
u_long mem_size;
int i;
@ -428,7 +430,7 @@ kmeminit(dummy)
* Note that the kmem_map is also used by the zone allocator,
* so make sure that there is enough space.
*/
vm_kmem_size = VM_KMEM_SIZE;
vm_kmem_size = VM_KMEM_SIZE + nmbclusters * PAGE_SIZE;
mem_size = cnt.v_page_count;
#if defined(VM_KMEM_SIZE_SCALE)
@ -462,17 +464,8 @@ kmeminit(dummy)
*/
init_param3(vm_kmem_size / PAGE_SIZE);
/*
* In mbuf_init(), we set up submaps for mbufs and clusters, in which
* case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES),
* respectively. Mathematically, this means that what we do here may
* amount to slightly more address space than we need for the submaps,
* but it never hurts to have an extra page in kmem_map.
*/
npg = (nmbufs*MSIZE + nmbclusters*MCLBYTES + vm_kmem_size) / PAGE_SIZE;
kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
(vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE));
(vm_offset_t *)&kmemlimit, vm_kmem_size);
kmem_map->system_map = 1;
uma_startup2();

385
sys/kern/kern_mbuf.c Normal file
View File

@ -0,0 +1,385 @@
/*-
* Copyright (c) 2004
* Bosko Milekic <bmilekic@FreeBSD.org>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the author nor the names of contributors may be
* used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_mac.h"
#include "opt_param.h"
#include <sys/param.h>
#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/protosw.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/uma.h>
/*
* In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
* Zones.
*
* Mbuf Clusters (2K, contiguous) are allocated from the Cluster
* Zone. The Zone can be capped at kern.ipc.nmbclusters, if the
* administrator so desires.
*
* Mbufs are allocated from a UMA Master Zone called the Mbuf
* Zone.
*
* Additionally, FreeBSD provides a Packet Zone, which it
* configures as a Secondary Zone to the Mbuf Master Zone,
* thus sharing backend Slab kegs with the Mbuf Master Zone.
*
* Thus common-case allocations and locking are simplified:
*
* m_clget() m_getcl()
* | |
* | .------------>[(Packet Cache)] m_get(), m_gethdr()
* | | [ Packet ] |
* [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ]
* [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ]
* | \________ |
* [ Cluster Keg ] \ /
* | [ Mbuf Keg ]
* [ Cluster Slabs ] |
* | [ Mbuf Slabs ]
* \____________(VM)_________________/
*/
int nmbclusters;
struct mbstat mbstat;
static void
tunable_mbinit(void *dummy)
{
/* This has to be done before VM init. */
nmbclusters = 1024 + maxusers * 64;
TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
}
SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
SYSCTL_DECL(_kern_ipc);
SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0,
"Maximum number of mbuf clusters allowed");
SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
"Mbuf general information and statistics");
/*
* Zones from which we allocate.
*/
uma_zone_t zone_mbuf;
uma_zone_t zone_clust;
uma_zone_t zone_pack;
/*
* Local prototypes.
*/
static void mb_ctor_mbuf(void *, int, void *);
static void mb_ctor_clust(void *, int, void *);
static void mb_ctor_pack(void *, int, void *);
static void mb_dtor_mbuf(void *, int, void *);
static void mb_dtor_clust(void *, int, void *); /* XXX */
static void mb_dtor_pack(void *, int, void *); /* XXX */
static void mb_init_pack(void *, int);
static void mb_fini_pack(void *, int);
static void mb_reclaim(void *);
static void mbuf_init(void *);
/*
* Initialize FreeBSD Network buffer allocation.
*/
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL)
static void
mbuf_init(void *dummy)
{
/*
* Configure UMA zones for Mbufs, Clusters, and Packets.
*/
zone_mbuf = uma_zcreate("Mbuf", MSIZE, mb_ctor_mbuf, mb_dtor_mbuf,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET);
zone_clust = uma_zcreate("MbufClust", MCLBYTES, mb_ctor_clust,
mb_dtor_clust, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
if (nmbclusters > 0)
uma_zone_set_max(zone_clust, nmbclusters);
zone_pack = uma_zsecond_create("Packet", mb_ctor_pack, mb_dtor_pack,
mb_init_pack, mb_fini_pack, zone_mbuf);
/* uma_prealloc() goes here */
/*
* Hook event handler for low-memory situation, used to
* drain protocols and push data back to the caches (UMA
* later pushes it back to VM).
*/
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
/*
* [Re]set counters and local statistics knobs.
* XXX Some of these should go and be replaced, but UMA stat
* gathering needs to be revised.
*/
mbstat.m_mbufs = 0;
mbstat.m_mclusts = 0;
mbstat.m_drain = 0;
mbstat.m_msize = MSIZE;
mbstat.m_mclbytes = MCLBYTES;
mbstat.m_minclsize = MINCLSIZE;
mbstat.m_mlen = MLEN;
mbstat.m_mhlen = MHLEN;
mbstat.m_numtypes = MT_NTYPES;
mbstat.m_mcfail = mbstat.m_mpfail = 0;
mbstat.sf_iocnt = 0;
mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
}
/*
* Constructor for Mbuf master zone.
*
* The 'arg' pointer points to a mb_args structure which
* contains call-specific information required to support the
* mbuf allocation API.
*/
static void
mb_ctor_mbuf(void *mem, int size, void *arg)
{
struct mbuf *m;
struct mb_args *args;
int flags;
int how;
short type;
m = (struct mbuf *)mem;
args = (struct mb_args *)arg;
flags = args->flags;
how = args->how;
type = args->type;
m->m_type = type;
m->m_next = NULL;
m->m_nextpkt = NULL;
if (flags & M_PKTHDR) {
m->m_data = m->m_pktdat;
m->m_flags = M_PKTHDR;
m->m_pkthdr.rcvif = NULL;
m->m_pkthdr.csum_flags = 0;
SLIST_INIT(&m->m_pkthdr.tags);
#ifdef MAC
/* If the label init fails, fail the alloc */
if (mac_init_mbuf(m, how) != 0) {
m_free(m);
/* XXX*/ panic("mb_ctor_mbuf(): can't deal with failure!");
/* return 0; */
}
#endif
} else {
m->m_data = m->m_dat;
m->m_flags = 0;
}
mbstat.m_mbufs += 1; /* XXX */
/* return 1;
*/
}
/*
* The Mbuf master zone and Packet secondary zone destructor.
*/
static void
mb_dtor_mbuf(void *mem, int size, void *arg)
{
struct mbuf *m;
m = (struct mbuf *)mem;
if ((m->m_flags & M_PKTHDR) != 0)
m_tag_delete_chain(m, NULL);
mbstat.m_mbufs -= 1; /* XXX */
}
/* XXX Only because of stats */
static void
mb_dtor_pack(void *mem, int size, void *arg)
{
struct mbuf *m;
m = (struct mbuf *)mem;
if ((m->m_flags & M_PKTHDR) != 0)
m_tag_delete_chain(m, NULL);
mbstat.m_mbufs -= 1; /* XXX */
mbstat.m_mclusts -= 1; /* XXX */
}
/*
* The Cluster zone constructor.
*
* Here the 'arg' pointer points to the Mbuf which we
* are configuring cluster storage for.
*/
static void
mb_ctor_clust(void *mem, int size, void *arg)
{
struct mbuf *m;
m = (struct mbuf *)arg;
m->m_ext.ext_buf = (caddr_t)mem;
m->m_data = m->m_ext.ext_buf;
m->m_flags |= M_EXT;
m->m_ext.ext_free = NULL;
m->m_ext.ext_args = NULL;
m->m_ext.ext_size = MCLBYTES;
m->m_ext.ext_type = EXT_CLUSTER;
m->m_ext.ref_cnt = (u_int *)uma_find_refcnt(zone_clust,
m->m_ext.ext_buf);
*(m->m_ext.ref_cnt) = 1;
mbstat.m_mclusts += 1; /* XXX */
/* return 1;
*/
}
/* XXX */
static void
mb_dtor_clust(void *mem, int size, void *arg)
{
mbstat.m_mclusts -= 1; /* XXX */
}
/*
* The Packet secondary zone's init routine, executed on the
* object's transition from keg slab to zone cache.
*/
static void
mb_init_pack(void *mem, int size)
{
struct mbuf *m;
m = (struct mbuf *)mem;
m->m_ext.ext_buf = NULL;
uma_zalloc_arg(zone_clust, m, M_NOWAIT);
if (m->m_ext.ext_buf == NULL) /* XXX */
panic("mb_init_pack(): Can't deal with failure yet.");
mbstat.m_mclusts -= 1; /* XXX */
}
/*
* The Packet secondary zone's fini routine, executed on the
* object's transition from zone cache to keg slab.
*/
static void
mb_fini_pack(void *mem, int size)
{
struct mbuf *m;
m = (struct mbuf *)mem;
uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
m->m_ext.ext_buf = NULL;
mbstat.m_mclusts += 1; /* XXX */
}
/*
* The "packet" keg constructor.
*/
static void
mb_ctor_pack(void *mem, int size, void *arg)
{
struct mbuf *m;
struct mb_args *args;
int flags, how;
short type;
m = (struct mbuf *)mem;
args = (struct mb_args *)arg;
flags = args->flags;
type = args->type;
how = args->how;
m->m_type = type;
m->m_next = NULL;
m->m_data = m->m_ext.ext_buf;
m->m_flags = flags|M_EXT;
m->m_ext.ext_free = NULL;
m->m_ext.ext_args = NULL;
m->m_ext.ext_size = MCLBYTES;
m->m_ext.ext_type = EXT_PACKET;
*(m->m_ext.ref_cnt) = 1;
if (flags & M_PKTHDR) {
m->m_nextpkt = NULL;
m->m_pkthdr.rcvif = NULL;
m->m_pkthdr.csum_flags = 0;
SLIST_INIT(&m->m_pkthdr.tags);
#ifdef MAC
/* If the label init fails, fail the alloc */
if (mac_init_mbuf(m, how) != 0) {
m_free(m);
/* XXX*/ panic("mb_ctor_pack(): can't deal with failure!");
/* return 0; */
}
#endif
}
mbstat.m_mbufs += 1; /* XXX */
mbstat.m_mclusts += 1; /* XXX */
/* return 1;
*/
}
/*
* This is the protocol drain routine.
*
* No locks should be held when this is called. The drain routines have to
* presently acquire some locks which raises the possibility of lock order
* reversal.
*/
static void
mb_reclaim(void *junk)
{
struct domain *dp;
struct protosw *pr;
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
"mb_reclaim()");
mbstat.m_drain++;
for (dp = domains; dp != NULL; dp = dp->dom_next)
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_drain != NULL)
(*pr->pr_drain)();
}

File diff suppressed because it is too large Load Diff

View File

@ -85,6 +85,161 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
&m_defragrandomfailures, 0, "");
#endif
/*
* Malloc-type for external ext_buf ref counts.
*/
MALLOC_DEFINE(M_MBUF, "mbextcnt", "mbuf external ref counts");
/*
* Allocate a given length worth of mbufs and/or clusters (whatever fits
* best) and return a pointer to the top of the allocated chain. If an
* existing mbuf chain is provided, then we will append the new chain
* to the existing one but still return the top of the newly allocated
* chain.
*/
struct mbuf *
m_getm(struct mbuf *m, int len, int how, short type)
{
struct mbuf *mb, *top, *cur, *mtail;
int num, rem;
int i;
KASSERT(len >= 0, ("m_getm(): len is < 0"));
/* If m != NULL, we will append to the end of that chain. */
if (m != NULL)
for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
else
mtail = NULL;
/*
* Calculate how many mbufs+clusters ("packets") we need and how much
* leftover there is after that and allocate the first mbuf+cluster
* if required.
*/
num = len / MCLBYTES;
rem = len % MCLBYTES;
top = cur = NULL;
if (num > 0) {
if ((top = cur = m_getcl(how, type, 0)) == NULL)
goto failed;
}
num--;
top->m_len = 0;
for (i = 0; i < num; i++) {
mb = m_getcl(how, type, 0);
if (mb == NULL)
goto failed;
mb->m_len = 0;
cur = (cur->m_next = mb);
}
if (rem > 0) {
mb = (rem > MINCLSIZE) ?
m_getcl(how, type, 0) : m_get(how, type);
if (mb == NULL)
goto failed;
mb->m_len = 0;
if (cur == NULL)
top = mb;
else
cur->m_next = mb;
}
if (mtail != NULL)
mtail->m_next = top;
return top;
failed:
if (top != NULL)
m_freem(top);
return NULL;
}
/*
* Free an entire chain of mbufs and associated external buffers, if
* applicable.
*/
void
m_freem(struct mbuf *mb)
{
while (mb != NULL)
mb = m_free(mb);
}
/*-
* Configure a provided mbuf to refer to the provided external storage
* buffer and setup a reference count for said buffer. If the setting
* up of the reference count fails, the M_EXT bit will not be set. If
* successfull, the M_EXT bit is set in the mbuf's flags.
*
* Arguments:
* mb The existing mbuf to which to attach the provided buffer.
* buf The address of the provided external storage buffer.
* size The size of the provided buffer.
* freef A pointer to a routine that is responsible for freeing the
* provided external storage buffer.
* args A pointer to an argument structure (of any type) to be passed
* to the provided freef routine (may be NULL).
* flags Any other flags to be passed to the provided mbuf.
* type The type that the external storage buffer should be
* labeled with.
*
* Returns:
* Nothing.
*/
void
m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
void (*freef)(void *, void *), void *args, int flags, int type)
{
u_int *ref_cnt = NULL;
/* XXX Shouldn't be adding EXT_CLUSTER with this API */
if (type == EXT_CLUSTER)
ref_cnt = (u_int *)uma_find_refcnt(zone_clust,
mb->m_ext.ext_buf);
else if (type == EXT_EXTREF)
ref_cnt = mb->m_ext.ref_cnt;
mb->m_ext.ref_cnt = (ref_cnt == NULL) ?
malloc(sizeof(u_int), M_MBUF, M_NOWAIT) : (u_int *)ref_cnt;
if (mb->m_ext.ref_cnt != NULL) {
*(mb->m_ext.ref_cnt) = 1;
mb->m_flags |= (M_EXT | flags);
mb->m_ext.ext_buf = buf;
mb->m_data = mb->m_ext.ext_buf;
mb->m_ext.ext_size = size;
mb->m_ext.ext_free = freef;
mb->m_ext.ext_args = args;
mb->m_ext.ext_type = type;
}
}
/*
* Non-directly-exported function to clean up after mbufs with M_EXT
* storage attached to them if the reference count hits 0.
*/
void
mb_free_ext(struct mbuf *m)
{
MEXT_REM_REF(m);
if (atomic_cmpset_int(m->m_ext.ref_cnt, 0, 1)) {
if (m->m_ext.ext_type == EXT_PACKET) {
uma_zfree(zone_pack, m);
return;
} else if (m->m_ext.ext_type == EXT_CLUSTER) {
uma_zfree(zone_clust, m->m_ext.ext_buf);
m->m_ext.ext_buf = NULL;
} else {
(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
m->m_ext.ext_args);
if (m->m_ext.ext_type != EXT_EXTREF)
free(m->m_ext.ref_cnt, M_MBUF);
}
}
uma_zfree(zone_mbuf, m);
}
/*
* "Move" mbuf pkthdr from "from" to "to".
* "from" must have M_PKTHDR set, and "to" must be empty.
@ -364,22 +519,22 @@ m_dup(struct mbuf *m, int how)
struct mbuf *n;
/* Get the next new mbuf */
MGET(n, how, m->m_type);
if (remain >= MINCLSIZE) {
n = m_getcl(how, m->m_type, 0);
nsize = MCLBYTES;
} else {
n = m_get(how, m->m_type);
nsize = MLEN;
}
if (n == NULL)
goto nospace;
if (top == NULL) { /* first one, must be PKTHDR */
if (!m_dup_pkthdr(n, m, how))
goto nospace;
nsize = MHLEN;
} else /* not the first one */
nsize = MLEN;
if (remain >= MINCLSIZE) {
MCLGET(n, how);
if ((n->m_flags & M_EXT) == 0) {
(void)m_free(n);
if (top == NULL) { /* First one, must be PKTHDR */
if (!m_dup_pkthdr(n, m, how)) {
m_free(n);
goto nospace;
}
nsize = MCLBYTES;
nsize = MHLEN;
}
n->m_len = 0;
@ -651,39 +806,42 @@ m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
void (*copy)(char *from, caddr_t to, u_int len))
{
struct mbuf *m;
struct mbuf *top = 0, **mp = &top;
struct mbuf *top = NULL, **mp = &top;
int len;
if (off < 0 || off > MHLEN)
return (NULL);
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL)
return (NULL);
m->m_pkthdr.rcvif = ifp;
m->m_pkthdr.len = totlen;
len = MHLEN;
while (totlen > 0) {
if (top) {
MGET(m, M_DONTWAIT, MT_DATA);
if (top == NULL) { /* First one, must be PKTHDR */
if (totlen + off >= MINCLSIZE) {
m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
len = MCLBYTES;
} else {
m = m_gethdr(M_DONTWAIT, MT_DATA);
len = MHLEN;
/* Place initial small packet/header at end of mbuf */
if (m && totlen + off + max_linkhdr <= MLEN) {
m->m_data += max_linkhdr;
len -= max_linkhdr;
}
}
if (m == NULL)
return NULL;
m->m_pkthdr.rcvif = ifp;
m->m_pkthdr.len = totlen;
} else {
if (totlen + off >= MINCLSIZE) {
m = m_getcl(M_DONTWAIT, MT_DATA, 0);
len = MCLBYTES;
} else {
m = m_get(M_DONTWAIT, MT_DATA);
len = MLEN;
}
if (m == NULL) {
m_freem(top);
return (NULL);
}
len = MLEN;
}
if (totlen + off >= MINCLSIZE) {
MCLGET(m, M_DONTWAIT);
if (m->m_flags & M_EXT)
len = MCLBYTES;
} else {
/*
* Place initial small packet/header at end of mbuf.
*/
if (top == NULL && totlen + off + max_linkhdr <= len) {
m->m_data += max_linkhdr;
len -= max_linkhdr;
return NULL;
}
}
if (off) {
@ -722,9 +880,10 @@ m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
off -= mlen;
totlen += mlen;
if (m->m_next == NULL) {
n = m_get_clrd(M_DONTWAIT, m->m_type);
n = m_get(M_DONTWAIT, m->m_type);
if (n == NULL)
goto out;
bzero(mtod(n, caddr_t), MLEN);
n->m_len = min(MLEN, len + off);
m->m_next = n;
}

View File

@ -230,14 +230,10 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
* now, we need to do the hard way. don't m_copy as there's no room
* on both end.
*/
MGET(o, M_DONTWAIT, m->m_type);
if (o && len > MLEN) {
MCLGET(o, M_DONTWAIT);
if ((o->m_flags & M_EXT) == 0) {
m_free(o);
o = NULL;
}
}
if (len > MLEN)
o = m_getcl(M_DONTWAIT, m->m_type, 0);
else
o = m_get(M_DONTWAIT, m->m_type);
if (!o) {
m_freem(m);
return NULL; /* ENOBUFS */
@ -274,29 +270,27 @@ static struct mbuf *
m_dup1(struct mbuf *m, int off, int len, int wait)
{
struct mbuf *n;
int l;
int copyhdr;
if (len > MCLBYTES)
return NULL;
if (off == 0 && (m->m_flags & M_PKTHDR) != 0) {
if (off == 0 && (m->m_flags & M_PKTHDR) != 0)
copyhdr = 1;
MGETHDR(n, wait, m->m_type);
l = MHLEN;
} else {
else
copyhdr = 0;
MGET(n, wait, m->m_type);
l = MLEN;
}
if (n && len > l) {
MCLGET(n, wait);
if ((n->m_flags & M_EXT) == 0) {
m_free(n);
n = NULL;
}
if (len >= MINCLSIZE) {
if (copyhdr == 1)
n = m_getcl(wait, m->m_type, M_PKTHDR);
else
n = m_getcl(wait, m->m_type, 0);
} else {
if (copyhdr == 1)
n = m_gethdr(wait, m->m_type);
else
n = m_get(wait, m->m_type);
}
if (!n)
return NULL;
return NULL; /* ENOBUFS */
if (copyhdr && !m_dup_pkthdr(n, m, wait)) {
m_free(n);

View File

@ -959,15 +959,12 @@ sbcreatecontrol(p, size, type, level)
if (CMSG_SPACE((u_int)size) > MCLBYTES)
return ((struct mbuf *) NULL);
if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
if (CMSG_SPACE((u_int)size > MLEN))
m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
else
m = m_get(M_DONTWAIT, MT_CONTROL);
if (m == NULL)
return ((struct mbuf *) NULL);
if (CMSG_SPACE((u_int)size) > MLEN) {
MCLGET(m, M_DONTWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
return ((struct mbuf *) NULL);
}
}
cp = mtod(m, struct cmsghdr *);
m->m_len = 0;
KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),

View File

@ -527,8 +527,8 @@ sosend(so, addr, uio, top, control, flags, td)
{
struct mbuf **mp;
struct mbuf *m;
long space, len, resid;
int clen = 0, error, s, dontroute, mlen;
long space, len = 0, resid;
int clen = 0, error, s, dontroute;
int atomic = sosendallatonce(so) || top;
#ifdef ZERO_COPY_SOCKETS
int cow_send;
@ -624,25 +624,23 @@ sosend(so, addr, uio, top, control, flags, td)
#ifdef ZERO_COPY_SOCKETS
cow_send = 0;
#endif /* ZERO_COPY_SOCKETS */
if (top == 0) {
MGETHDR(m, M_TRYWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
goto release;
}
mlen = MHLEN;
m->m_pkthdr.len = 0;
m->m_pkthdr.rcvif = (struct ifnet *)0;
} else {
MGET(m, M_TRYWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
goto release;
}
mlen = MLEN;
}
if (resid >= MINCLSIZE) {
#ifdef ZERO_COPY_SOCKETS
if (top == NULL) {
MGETHDR(m, M_TRYWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
goto release;
}
m->m_pkthdr.len = 0;
m->m_pkthdr.rcvif = (struct ifnet *)0;
} else {
MGET(m, M_TRYWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
goto release;
}
}
if (so_zero_copy_send &&
resid>=PAGE_SIZE &&
space>=PAGE_SIZE &&
@ -654,29 +652,48 @@ sosend(so, addr, uio, top, control, flags, td)
cow_send = socow_setup(m, uio);
}
}
if (!cow_send){
#endif /* ZERO_COPY_SOCKETS */
MCLGET(m, M_TRYWAIT);
if ((m->m_flags & M_EXT) == 0)
goto nopages;
mlen = MCLBYTES;
len = min(min(mlen, resid), space);
} else {
#ifdef ZERO_COPY_SOCKETS
if (!cow_send) {
MCLGET(m, M_TRYWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
m = NULL;
} else {
len = min(min(MCLBYTES, resid), space);
}
} else
len = PAGE_SIZE;
}
} else {
#else /* ZERO_COPY_SOCKETS */
if (top == NULL) {
m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
m->m_pkthdr.len = 0;
m->m_pkthdr.rcvif = (struct ifnet *)0;
} else
m = m_getcl(M_TRYWAIT, MT_DATA, 0);
len = min(min(MCLBYTES, resid), space);
#endif /* ZERO_COPY_SOCKETS */
nopages:
len = min(min(mlen, resid), space);
/*
* For datagram protocols, leave room
* for protocol headers in first mbuf.
*/
if (atomic && top == 0 && len < mlen)
MH_ALIGN(m, len);
} else {
if (top == NULL) {
m = m_gethdr(M_TRYWAIT, MT_DATA);
m->m_pkthdr.len = 0;
m->m_pkthdr.rcvif = (struct ifnet *)0;
len = min(min(MHLEN, resid), space);
/*
* For datagram protocols, leave room
* for protocol headers in first mbuf.
*/
if (atomic && m && len < MHLEN)
MH_ALIGN(m, len);
} else {
m = m_get(M_TRYWAIT, MT_DATA);
len = min(min(MLEN, resid), space);
}
}
if (m == NULL) {
error = ENOBUFS;
goto release;
}
space -= len;
#ifdef ZERO_COPY_SOCKETS
if (cow_send)

View File

@ -959,15 +959,12 @@ sbcreatecontrol(p, size, type, level)
if (CMSG_SPACE((u_int)size) > MCLBYTES)
return ((struct mbuf *) NULL);
if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
if (CMSG_SPACE((u_int)size > MLEN))
m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
else
m = m_get(M_DONTWAIT, MT_CONTROL);
if (m == NULL)
return ((struct mbuf *) NULL);
if (CMSG_SPACE((u_int)size) > MLEN) {
MCLGET(m, M_DONTWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
return ((struct mbuf *) NULL);
}
}
cp = mtod(m, struct cmsghdr *);
m->m_len = 0;
KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),

View File

@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <sys/vnode.h>
#ifdef KTRACE
@ -84,6 +85,21 @@ static int getsockname1(struct thread *td, struct getsockname_args *uap,
static int getpeername1(struct thread *td, struct getpeername_args *uap,
int compat);
/*
* NSFBUFS-related variables and associated sysctls
*/
int nsfbufs;
int nsfbufspeak;
int nsfbufsused;
SYSCTL_DECL(_kern_ipc);
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
"Maximum number of sendfile(2) sf_bufs available");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
"Number of sendfile(2) sf_bufs at peak usage");
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
/*
* System call interface to the socket abstraction.
*/

View File

@ -86,6 +86,10 @@
#include <machine/tlb.h>
#include <machine/tstate.h>
#ifndef NSFBUFS
#define NSFBUFS (512 + maxusers * 16)
#endif
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
@ -351,6 +355,9 @@ sf_buf_init(void *arg)
vm_offset_t sf_base;
int i;
nsfbufs = NSFBUFS;
TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
SLIST_INIT(&sf_freelist.sf_head);
sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);

View File

@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@ -33,7 +33,12 @@
#ifndef _SYS_MBUF_H_
#define _SYS_MBUF_H_
/* XXX: These includes suck. Sorry! */
#include <sys/queue.h>
#ifdef _KERNEL
#include <sys/systm.h>
#include <vm/uma.h>
#endif
/*
* Mbufs are of a single size, MSIZE (sys/param.h), which
@ -57,6 +62,16 @@
*/
#define mtod(m, t) ((t)((m)->m_data))
#define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1)))
/*
* Argument structure passed to UMA routines during mbuf and packet
* allocations.
*/
struct mb_args {
int flags; /* Flags for mbuf being allocated */
int how; /* How to allocate: M_WAITOK or M_DONTWAIT */
short type; /* Type of mbuf being allocated */
};
#endif /* _KERNEL */
/*
@ -167,6 +182,7 @@ struct mbuf {
*/
#define EXT_CLUSTER 1 /* mbuf cluster */
#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */
#define EXT_PACKET 3 /* came out of Packet zone */
#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */
#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */
#define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */
@ -222,29 +238,13 @@ struct mbuf {
#define MT_OOBDATA 15 /* expedited data */
#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
/*
* Mbuf and cluster allocation statistics PCPU structure.
*/
struct mbpstat {
u_long mb_mbfree;
u_long mb_mbbucks;
u_long mb_clfree;
u_long mb_clbucks;
long mb_mbtypes[MT_NTYPES];
short mb_active;
};
/*
* General mbuf allocator statistics structure.
* XXX: Modifications of these are not protected by any mutex locks nor by
* any atomic() manipulations. As a result, we may occasionally lose
* a count or two. Luckily, not all of these fields are modified at all
* and remain static, and those that are manipulated are only manipulated
* in failure situations, which do not occur (hopefully) very often.
*/
struct mbstat {
u_long m_drops; /* times failed to allocate */
u_long m_wait; /* times succesfully returned from wait */
u_long m_mbufs; /* XXX */
u_long m_mclusts; /* XXX */
u_long m_drain; /* times drained protocols for space */
u_long m_mcfail; /* XXX: times m_copym failed */
u_long m_mpfail; /* XXX: times m_pullup failed */
@ -253,10 +253,10 @@ struct mbstat {
u_long m_minclsize; /* min length of data to allocate a cluster */
u_long m_mlen; /* length of data in an mbuf */
u_long m_mhlen; /* length of data in a header mbuf */
u_int m_mbperbuck; /* number of mbufs per "bucket" */
u_int m_clperbuck; /* number of clusters per "bucket" */
/* Number of mbtypes (gives # elems in mbpstat's mb_mbtypes[] array: */
/* Number of mbtypes (gives # elems in mbtypes[] array: */
short m_numtypes;
/* XXX: Sendfile stats should eventually move to their own struct */
u_long sf_iocnt; /* times sendfile had to do disk I/O */
u_long sf_allocfail; /* times sfbuf allocation failed */
@ -265,14 +265,23 @@ struct mbstat {
/*
* Flags specifying how an allocation should be made.
* M_DONTWAIT means "don't block if nothing is available" whereas
* M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is
* available."
*
* The flag to use is as follows:
* - M_DONTWAIT or M_NOWAIT from an interrupt handler to not block allocation.
* - M_WAIT or M_WAITOK or M_TRYWAIT from wherever it is safe to block.
*
* M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly
* and if we cannot allocate immediately we may return NULL,
* whereas M_WAIT/M_WAITOK/M_TRYWAIT means that if we cannot allocate
* resources we will block until they are available, and thus never
* return NULL.
*
* XXX Eventually just phase this out to use M_WAITOK/M_NOWAIT.
*/
#define M_DONTWAIT 0x4 /* don't conflict with M_NOWAIT */
#define M_TRYWAIT 0x8 /* or M_WAITOK */
#define M_WAIT M_TRYWAIT /* XXX: deprecated */
#define MBTOM(how) ((how) & M_TRYWAIT ? M_WAITOK : M_NOWAIT)
#define MBTOM(how) (how)
#define M_DONTWAIT M_NOWAIT
#define M_TRYWAIT M_WAITOK
#define M_WAIT M_WAITOK
#ifdef _KERNEL
/*-
@ -295,36 +304,121 @@ struct mbstat {
#define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1)
/*
* Network buffer allocation API
*
* The rest of it is defined in kern/subr_mbuf.c
*/
extern uma_zone_t zone_mbuf;
extern uma_zone_t zone_clust;
extern uma_zone_t zone_pack;
static __inline struct mbuf *m_get(int how, short type);
static __inline struct mbuf *m_gethdr(int how, short type);
static __inline struct mbuf *m_getcl(int how, short type, int flags);
static __inline struct mbuf *m_getclr(int how, short type); /* XXX */
static __inline struct mbuf *m_free(struct mbuf *m);
static __inline void m_clget(struct mbuf *m, int how);
static __inline void m_chtype(struct mbuf *m, short new_type);
void mb_free_ext(struct mbuf *);
static __inline
struct mbuf *
m_get(int how, short type)
{
struct mb_args args;
args.flags = 0;
args.how = how;
args.type = type;
return (uma_zalloc_arg(zone_mbuf, &args, how));
}
/* XXX This should be depracated, very little use */
static __inline
struct mbuf *
m_getclr(int how, short type)
{
struct mbuf *m;
struct mb_args args;
args.flags = 0;
args.how = how;
args.type = type;
m = uma_zalloc_arg(zone_mbuf, &args, how);
if (m != NULL)
bzero(m->m_data, MLEN);
return m;
}
static __inline
struct mbuf *
m_gethdr(int how, short type)
{
struct mb_args args;
args.flags = M_PKTHDR;
args.how = how;
args.type = type;
return (uma_zalloc_arg(zone_mbuf, &args, how));
}
static __inline
struct mbuf *
m_getcl(int how, short type, int flags)
{
struct mb_args args;
args.flags = flags;
args.how = how;
args.type = type;
return (uma_zalloc_arg(zone_pack, &args, how));
}
static __inline
struct mbuf *
m_free(struct mbuf *m)
{
struct mbuf *n = m->m_next;
#ifdef INVARIANTS
m->m_flags |= M_FREELIST;
#endif
if (m->m_flags & M_EXT)
mb_free_ext(m);
else
uma_zfree(zone_mbuf, m);
return n;
}
static __inline
void
m_clget(struct mbuf *m, int how)
{
m->m_ext.ext_buf = NULL;
uma_zalloc_arg(zone_clust, m, how);
}
static __inline
void
m_chtype(struct mbuf *m, short new_type)
{
m->m_type = new_type;
}
/*
* mbuf, cluster, and external object allocation macros
* (for compatibility purposes).
*/
/* NB: M_COPY_PKTHDR is deprecated. Use M_MOVE_PKTHDR or m_dup_pktdr. */
#define M_MOVE_PKTHDR(to, from) m_move_pkthdr((to), (from))
#define m_getclr(how, type) m_get_clrd((how), (type))
#define MGET(m, how, type) ((m) = m_get((how), (type)))
#define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type)))
#define MCLGET(m, how) m_clget((m), (how))
#define MEXTADD(m, buf, size, free, args, flags, type) \
m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type))
/*
* MEXTFREE(m): disassociate (and possibly free) an external object from (m).
*
* If the atomic_cmpset_int() returns 0, then we effectively do nothing
* in terms of "cleaning up" (freeing the ext buf and ref. counter) as
* this means that either there are still references, or another thread
* is taking care of the clean-up.
*/
#define MEXTFREE(m) do { \
struct mbuf *_mb = (m); \
\
MEXT_REM_REF(_mb); \
if (atomic_cmpset_int(_mb->m_ext.ref_cnt, 0, 1)) \
_mext_free(_mb); \
_mb->m_flags &= ~M_EXT; \
} while (0)
/*
* Evaluate TRUE if it's safe to write to the mbuf m's data region (this
* can be both the local data payload, or an external buffer area,
@ -425,18 +519,13 @@ extern int max_linkhdr; /* Largest link-level header */
extern int max_protohdr; /* Largest protocol header */
extern struct mbstat mbstat; /* General mbuf stats/infos */
extern int nmbclusters; /* Maximum number of clusters */
extern int nmbcnt; /* Scale kmem_map for counter space */
extern int nmbufs; /* Maximum number of mbufs */
struct uio;
void _mext_free(struct mbuf *);
void m_adj(struct mbuf *, int);
int m_apply(struct mbuf *, int, int,
int (*)(void *, void *, u_int), void *);
void m_cat(struct mbuf *, struct mbuf *);
void m_chtype(struct mbuf *, short);
void m_clget(struct mbuf *, int);
void m_extadd(struct mbuf *, caddr_t, u_int,
void (*)(void *, void *), void *, int, int);
void m_copyback(struct mbuf *, int, int, c_caddr_t);
@ -451,13 +540,7 @@ struct mbuf *m_dup(struct mbuf *, int);
int m_dup_pkthdr(struct mbuf *, struct mbuf *, int);
u_int m_fixhdr(struct mbuf *);
struct mbuf *m_fragment(struct mbuf *, int, int);
struct mbuf *m_free(struct mbuf *);
void m_freem(struct mbuf *);
struct mbuf *m_get(int, short);
struct mbuf *m_get_clrd(int, short);
struct mbuf *m_getcl(int, short, int);
struct mbuf *m_gethdr(int, short);
struct mbuf *m_gethdr_clrd(int, short);
struct mbuf *m_getm(struct mbuf *, int, int, short);
struct mbuf *m_getptr(struct mbuf *, int, int *);
u_int m_length(struct mbuf *, struct mbuf **);
@ -470,7 +553,7 @@ struct mbuf *m_split(struct mbuf *, int, int);
struct mbuf *m_uiotombuf(struct uio *, int, int);
/*-
* Packets may have annotations attached by affixing a list
* Network packets may have annotations attached by affixing a list
* of "packet tags" to the pkthdr structure. Packet tags are
* dynamically allocated semi-opaque data structures that have
* a fixed header (struct m_tag) that specifies the size of the

View File

@ -43,7 +43,7 @@
/* Types and type defs */
struct uma_zone;
struct uma_zone;
/* Opaque type used as a handle to the zone */
typedef struct uma_zone * uma_zone_t;
@ -157,11 +157,45 @@ typedef void (*uma_fini)(void *mem, int size);
* A pointer to a structure which is intended to be opaque to users of
* the interface. The value may be null if the wait flag is not set.
*/
uma_zone_t uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
uma_init uminit, uma_fini fini, int align,
u_int16_t flags);
/*
* Create a secondary uma zone
*
* Arguments:
* name The text name of the zone for debugging and stats, this memory
* should not be freed until the zone has been deallocated.
* ctor The constructor that is called when the object is allocated
* dtor The destructor that is called when the object is freed.
* zinit An initializer that sets up the initial state of the memory
* as the object passes from the Keg's slab to the Zone's cache.
* zfini A discard function that undoes initialization done by init
* as the object passes from the Zone's cache to the Keg's slab.
*
* ctor/dtor/zinit/zfini may all be null, see notes above.
* Note that the zinit and zfini specified here are NOT
* exactly the same as the init/fini specified to uma_zcreate()
* when creating a master zone. These zinit/zfini are called
* on the TRANSITION from keg to zone (and vice-versa). Once
* these are set, the primary zone may alter its init/fini
* (which are called when the object passes from VM to keg)
* using uma_zone_set_init/fini()) as well as its own
* zinit/zfini (unset by default for master zone) with
* uma_zone_set_zinit/zfini() (note subtle 'z' prefix).
*
* align A bitmask that corisponds to the requested alignment
* eg 4 would be 0x3
* flags A set of parameters that control the behavior of the zone
*
* Returns:
* A pointer to a structure which is intended to be opaque to users of
* the interface. The value may be null if the wait flag is not set.
*/
uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
uma_init zinit, uma_fini zfini, uma_zone_t master);
/*
* Definitions for uma_zcreate flags
*
@ -185,6 +219,9 @@ uma_zone_t uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
* Use a hash table instead of caching
* information in the vm_page.
*/
#define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */
#define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */
#define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */
/* Definitions for align */
#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */
@ -201,7 +238,6 @@ uma_zone_t uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
* zone The zone we want to destroy.
*
*/
void uma_zdestroy(uma_zone_t zone);
/*
@ -375,6 +411,28 @@ int uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int size);
*/
void uma_zone_set_max(uma_zone_t zone, int nitems);
/*
* The following two routines (uma_zone_set_init/fini)
* are used to set the backend init/fini pair which acts on an
* object as it becomes allocated and is placed in a slab within
* the specified zone's backing keg. These should probably not
* be changed once allocations have already begun and only
* immediately upon zone creation.
*/
void uma_zone_set_init(uma_zone_t zone, uma_init uminit);
void uma_zone_set_fini(uma_zone_t zone, uma_fini fini);
/*
* The following two routines (uma_zone_set_zinit/zfini) are
* used to set the zinit/zfini pair which acts on an object as
* it passes from the backing Keg's slab cache to the
* specified Zone's bucket cache. These should probably not
* be changed once allocations have already begun and
* only immediately upon zone creation.
*/
void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit);
void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini);
/*
* Replaces the standard page_alloc or obj_alloc functions for this zone
*
@ -430,5 +488,19 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free freef);
*/
void uma_prealloc(uma_zone_t zone, int itemcnt);
/*
* Used to lookup the reference counter allocated for an item
* from a UMA_ZONE_REFCNT zone. For UMA_ZONE_REFCNT zones,
* reference counters are allocated for items and stored in
* the underlying slab header.
*
* Arguments:
* zone The UMA_ZONE_REFCNT zone to which the item belongs.
* item The address of the item for which we want a refcnt.
*
* Returns:
* A pointer to a u_int32_t reference counter.
*/
u_int32_t *uma_find_refcnt(uma_zone_t zone, void *item);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -192,15 +192,17 @@ static uma_slab_t
uma_dbg_getslab(uma_zone_t zone, void *item)
{
uma_slab_t slab;
uma_keg_t keg;
u_int8_t *mem;
keg = zone->uz_keg;
mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
if (zone->uz_flags & UMA_ZONE_MALLOC) {
if (keg->uk_flags & UMA_ZONE_MALLOC) {
slab = vtoslab((vm_offset_t)mem);
} else if (zone->uz_flags & UMA_ZONE_HASH) {
slab = hash_sfind(&zone->uz_hash, mem);
} else if (keg->uk_flags & UMA_ZONE_HASH) {
slab = hash_sfind(&keg->uk_hash, mem);
} else {
mem += zone->uz_pgoff;
mem += keg->uk_pgoff;
slab = (uma_slab_t)mem;
}
@ -215,8 +217,10 @@ uma_dbg_getslab(uma_zone_t zone, void *item)
void
uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
{
uma_keg_t keg;
int freei;
keg = zone->uz_keg;
if (slab == NULL) {
slab = uma_dbg_getslab(zone, item);
if (slab == NULL)
@ -225,9 +229,9 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
}
freei = ((unsigned long)item - (unsigned long)slab->us_data)
/ zone->uz_rsize;
/ keg->uk_rsize;
slab->us_freelist[freei] = 255;
slab->us_freelist[freei].us_item = 255;
return;
}
@ -241,8 +245,10 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
void
uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
{
uma_keg_t keg;
int freei;
keg = zone->uz_keg;
if (slab == NULL) {
slab = uma_dbg_getslab(zone, item);
if (slab == NULL)
@ -251,22 +257,22 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
}
freei = ((unsigned long)item - (unsigned long)slab->us_data)
/ zone->uz_rsize;
/ keg->uk_rsize;
if (freei >= zone->uz_ipers)
if (freei >= keg->uk_ipers)
panic("zone: %s(%p) slab %p freelist %d out of range 0-%d\n",
zone->uz_name, zone, slab, freei, zone->uz_ipers-1);
zone->uz_name, zone, slab, freei, keg->uk_ipers-1);
if (((freei * zone->uz_rsize) + slab->us_data) != item) {
if (((freei * keg->uk_rsize) + slab->us_data) != item) {
printf("zone: %s(%p) slab %p freed address %p unaligned.\n",
zone->uz_name, zone, slab, item);
panic("should be %p\n",
(freei * zone->uz_rsize) + slab->us_data);
(freei * keg->uk_rsize) + slab->us_data);
}
if (slab->us_freelist[freei] != 255) {
if (slab->us_freelist[freei].us_item != 255) {
printf("Slab at %p, freei %d = %d.\n",
slab, freei, slab->us_freelist[freei]);
slab, freei, slab->us_freelist[freei].us_item);
panic("Duplicate free of item %p from zone %p(%s)\n",
item, zone, zone->uz_name);
}
@ -276,5 +282,5 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
* Until then the count of valid slabs will make sure we don't
* accidentally follow this and assume it's a valid index.
*/
slab->us_freelist[freei] = 0;
slab->us_freelist[freei].us_item = 0;
}

View File

@ -35,10 +35,10 @@
/*
* Here's a quick description of the relationship between the objects:
*
* Zones contain lists of slabs which are stored in either the full bin, empty
* Kegs contain lists of slabs which are stored in either the full bin, empty
* bin, or partially allocated bin, to reduce fragmentation. They also contain
* the user supplied value for size, which is adjusted for alignment purposes
* and rsize is the result of that. The zone also stores information for
* and rsize is the result of that. The Keg also stores information for
* managing a hash of page addresses that maps pages to uma_slab_t structures
* for pages that don't have embedded uma_slab_t's.
*
@ -67,6 +67,20 @@
* so at this time it may not make sense to optimize for it. This can, of
* course, be solved with dynamic slab sizes.
*
* Kegs may serve multiple Zones but by far most of the time they only serve
* one. When a Zone is created, a Keg is allocated and setup for it. While
* the backing Keg stores slabs, the Zone caches Buckets of items allocated
* from the slabs. Each Zone is equipped with an init/fini and ctor/dtor
* pair, as well as with its own set of small per-CPU caches, layered above
* the Zone's general Bucket cache.
*
* The PCPU caches are protected by their own locks, while the Zones backed
* by the same Keg all share a common Keg lock (to coalesce contention on
* the backing slabs). The backing Keg typically only serves one Zone but
* in the case of multiple Zones, one of the Zones is considered the
* Master Zone and all Zone-related stats from the Keg are done in the
* Master Zone. For an example of a Multi-Zone setup, refer to the
* Mbuf allocation code.
*/
/*
@ -134,28 +148,6 @@
SLIST_REMOVE(&(h)->uh_slab_hash[UMA_HASH((h), \
(mem))], (s), uma_slab, us_hlink);
/* Page management structure */
/* Sorry for the union, but space efficiency is important */
struct uma_slab {
uma_zone_t us_zone; /* Zone we live in */
union {
LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */
unsigned long _us_size; /* Size of allocation */
} us_type;
SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */
u_int8_t *us_data; /* First item */
u_int8_t us_flags; /* Page flags see uma.h */
u_int8_t us_freecount; /* How many are free? */
u_int8_t us_firstfree; /* First free item index */
u_int8_t us_freelist[1]; /* Free List (actually larger) */
};
#define us_link us_type._us_link
#define us_size us_type._us_size
typedef struct uma_slab * uma_slab_t;
/* Hash table for freed address -> slab translation */
SLIST_HEAD(slabhead, uma_slab);
@ -187,6 +179,97 @@ struct uma_cache {
typedef struct uma_cache * uma_cache_t;
/*
* Keg management structure
*
* TODO: Optimize for cache line size
*
*/
struct uma_keg {
LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */
struct mtx uk_lock; /* Lock for the keg */
struct uma_hash uk_hash;
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */
LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */
LIST_HEAD(,uma_slab) uk_full_slab; /* full slabs */
u_int32_t uk_recurse; /* Allocation recursion count */
u_int32_t uk_align; /* Alignment mask */
u_int32_t uk_pages; /* Total page count */
u_int32_t uk_free; /* Count of items free in slabs */
u_int32_t uk_size; /* Requested size of each item */
u_int32_t uk_rsize; /* Real size of each item */
u_int32_t uk_maxpages; /* Maximum number of pages to alloc */
uma_init uk_init; /* Keg's init routine */
uma_fini uk_fini; /* Keg's fini routine */
uma_alloc uk_allocf; /* Allocation function */
uma_free uk_freef; /* Free routine */
struct vm_object *uk_obj; /* Zone specific object */
vm_offset_t uk_kva; /* Base kva for zones with objs */
uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */
u_int16_t uk_pgoff; /* Offset to uma_slab struct */
u_int16_t uk_ppera; /* pages per allocation from backend */
u_int16_t uk_ipers; /* Items per slab */
u_int16_t uk_flags; /* Internal flags */
};
/* Simpler reference to uma_keg for internal use. */
typedef struct uma_keg * uma_keg_t;
/* Page management structure */
/* Sorry for the union, but space efficiency is important */
struct uma_slab_head {
uma_keg_t us_keg; /* Keg we live in */
union {
LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */
unsigned long _us_size; /* Size of allocation */
} us_type;
SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */
u_int8_t *us_data; /* First item */
u_int8_t us_flags; /* Page flags see uma.h */
u_int8_t us_freecount; /* How many are free? */
u_int8_t us_firstfree; /* First free item index */
};
/* The standard slab structure */
struct uma_slab {
struct uma_slab_head us_head; /* slab header data */
struct {
u_int8_t us_item;
} us_freelist[1]; /* actual number bigger */
};
/*
* The slab structure for UMA_ZONE_REFCNT zones for whose items we
* maintain reference counters in the slab for.
*/
struct uma_slab_refcnt {
struct uma_slab_head us_head; /* slab header data */
struct {
u_int8_t us_item;
u_int32_t us_refcnt;
} us_freelist[1]; /* actual number bigger */
};
#define us_keg us_head.us_keg
#define us_link us_head.us_type._us_link
#define us_size us_head.us_type._us_size
#define us_hlink us_head.us_hlink
#define us_data us_head.us_data
#define us_flags us_head.us_flags
#define us_freecount us_head.us_freecount
#define us_firstfree us_head.us_firstfree
typedef struct uma_slab * uma_slab_t;
typedef struct uma_slab_refcnt * uma_slabrefcnt_t;
/*
* Zone management structure
*
@ -195,42 +278,22 @@ typedef struct uma_cache * uma_cache_t;
*/
struct uma_zone {
char *uz_name; /* Text name of the zone */
LIST_ENTRY(uma_zone) uz_link; /* List of all zones */
u_int32_t uz_align; /* Alignment mask */
u_int32_t uz_pages; /* Total page count */
struct mtx *uz_lock; /* Lock for the zone (keg's lock) */
uma_keg_t uz_keg; /* Our underlying Keg */
/* Used during alloc / free */
struct mtx uz_lock; /* Lock for the zone */
u_int32_t uz_free; /* Count of items free in slabs */
u_int16_t uz_ipers; /* Items per slab */
u_int16_t uz_flags; /* Internal flags */
LIST_HEAD(,uma_slab) uz_part_slab; /* partially allocated slabs */
LIST_HEAD(,uma_slab) uz_free_slab; /* empty slab list */
LIST_HEAD(,uma_slab) uz_full_slab; /* full slabs */
LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
LIST_HEAD(,uma_bucket) uz_full_bucket; /* full buckets */
LIST_HEAD(,uma_bucket) uz_free_bucket; /* Buckets for frees */
u_int32_t uz_size; /* Requested size of each item */
u_int32_t uz_rsize; /* Real size of each item */
struct uma_hash uz_hash;
u_int16_t uz_pgoff; /* Offset to uma_slab struct */
u_int16_t uz_ppera; /* pages per allocation from backend */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
u_int64_t uz_allocs; /* Total number of allocations */
uma_init uz_init; /* Initializer for each item */
uma_fini uz_fini; /* Discards memory */
uma_alloc uz_allocf; /* Allocation function */
uma_free uz_freef; /* Free routine */
struct vm_object *uz_obj; /* Zone specific object */
vm_offset_t uz_kva; /* Base kva for zones with objs */
u_int32_t uz_maxpages; /* Maximum number of pages to alloc */
int uz_recurse; /* Allocation recursion count */
u_int64_t uz_allocs; /* Total number of allocations */
uint16_t uz_fills; /* Outstanding bucket fills */
uint16_t uz_count; /* Highest value ub_ptr can have */
/*
* This HAS to be the last item because we adjust the zone size
* based on NCPU and then allocate the space for the zones.
@ -256,16 +319,16 @@ void uma_large_free(uma_slab_t slab);
#define ZONE_LOCK_INIT(z, lc) \
do { \
if ((lc)) \
mtx_init(&(z)->uz_lock, (z)->uz_name, \
mtx_init((z)->uz_lock, (z)->uz_name, \
(z)->uz_name, MTX_DEF | MTX_DUPOK); \
else \
mtx_init(&(z)->uz_lock, (z)->uz_name, \
mtx_init((z)->uz_lock, (z)->uz_name, \
"UMA zone", MTX_DEF | MTX_DUPOK); \
} while (0)
#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock)
#define ZONE_LOCK(z) mtx_lock(&(z)->uz_lock)
#define ZONE_UNLOCK(z) mtx_unlock(&(z)->uz_lock)
#define ZONE_LOCK_FINI(z) mtx_destroy((z)->uz_lock)
#define ZONE_LOCK(z) mtx_lock((z)->uz_lock)
#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lock)
#define CPU_LOCK_INIT(cpu) \
mtx_init(&uma_pcpu_mtx[(cpu)], "UMA pcpu", "UMA pcpu", \

View File

@ -320,16 +320,6 @@ kmem_malloc(map, size, flags)
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
vm_map_unlock(map);
if (map != kmem_map) {
static int last_report; /* when we did it (in ticks) */
if (ticks < last_report ||
(ticks - last_report) >= hz) {
last_report = ticks;
printf("Out of mbuf address space!\n");
printf("Consider increasing NMBCLUSTERS\n");
}
return (0);
}
if ((flags & M_NOWAIT) == 0)
panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated",
(long)size, (long)map->size);

View File

@ -256,7 +256,6 @@ static char *nlistf = NULL, *memf = NULL;
int Aflag; /* show addresses of protocol control block */
int aflag; /* show all sockets (including servers) */
int bflag; /* show i/f total bytes in/out */
int cflag; /* show mbuf cache information */
int dflag; /* show i/f dropped packets */
int gflag; /* show group (multicast) routing or stats */
int iflag; /* show interfaces */
@ -297,9 +296,6 @@ main(int argc, char *argv[])
case 'b':
bflag = 1;
break;
case 'c':
cflag = 1;
break;
case 'd':
dflag = 1;
break;
@ -425,10 +421,6 @@ main(int argc, char *argv[])
if (nlistf != NULL || memf != NULL)
setgid(getgid());
if (cflag && !mflag) {
(void)fprintf(stderr, "-c only valid with -m\n");
usage();
}
if (mflag) {
if (memf != NULL) {
if (kread(0, 0, 0) == 0)

View File

@ -99,17 +99,12 @@ mbpr(u_long mbaddr, u_long mbtaddr __unused, u_long nmbcaddr, u_long nmbufaddr,
u_long mbhiaddr, u_long clhiaddr, u_long mbloaddr, u_long clloaddr,
u_long cpusaddr __unused, u_long pgsaddr, u_long mbpaddr)
{
int i, j, nmbufs, nmbclusters, page_size, num_objs;
int i, nmbclusters;
int nsfbufs, nsfbufspeak, nsfbufsused;
u_int mbuf_hiwm, clust_hiwm, mbuf_lowm, clust_lowm;
u_long totspace[2], totused[2];
u_long gentotnum, gentotfree, totnum, totfree;
u_long totmem, totmemalloced, totmemused;
short nmbtypes;
size_t mlen;
long *mbtypes = NULL;
struct mbstat *mbstat = NULL;
struct mbpstat **mbpstat = NULL;
struct mbtypenames *mp;
bool *seen = NULL;
@ -119,50 +114,12 @@ mbpr(u_long mbaddr, u_long mbtaddr __unused, u_long nmbcaddr, u_long nmbufaddr,
goto err;
}
/*
* XXX: Unfortunately, for the time being, we have to fetch
* the total length of the per-CPU stats area via sysctl
* (regardless of whether we're looking at a core or not.
*/
if (sysctlbyname("kern.ipc.mb_statpcpu", NULL, &mlen, NULL, 0) < 0) {
warn("sysctl: retrieving mb_statpcpu len");
goto err;
}
num_objs = (int)(mlen / sizeof(struct mbpstat));
if ((mbpstat = calloc(num_objs, sizeof(struct mbpstat *))) == NULL) {
warn("calloc: cannot allocate memory for mbpstats pointers");
goto err;
}
if ((mbpstat[0] = calloc(num_objs, sizeof(struct mbpstat))) == NULL) {
warn("calloc: cannot allocate memory for mbpstats");
goto err;
}
if (mbaddr) {
if (kread(mbpaddr, (char *)mbpstat[0], mlen))
goto err;
if (kread(mbaddr, (char *)mbstat, sizeof mbstat))
goto err;
if (kread(nmbcaddr, (char *)&nmbclusters, sizeof(int)))
goto err;
if (kread(nmbufaddr, (char *)&nmbufs, sizeof(int)))
goto err;
if (kread(mbhiaddr, (char *)&mbuf_hiwm, sizeof(u_int)))
goto err;
if (kread(clhiaddr, (char *)&clust_hiwm, sizeof(u_int)))
goto err;
if (kread(mbloaddr, (char *)&mbuf_lowm, sizeof(u_int)))
goto err;
if (kread(clloaddr, (char *)&clust_lowm, sizeof(u_int)))
goto err;
if (kread(pgsaddr, (char *)&page_size, sizeof(int)))
goto err;
} else {
if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat[0], &mlen,
NULL, 0) < 0) {
warn("sysctl: retrieving mb_statpcpu");
goto err;
}
mlen = sizeof *mbstat;
if (sysctlbyname("kern.ipc.mbstat", mbstat, &mlen, NULL, 0)
< 0) {
@ -175,43 +132,9 @@ mbpr(u_long mbaddr, u_long mbtaddr __unused, u_long nmbcaddr, u_long nmbufaddr,
warn("sysctl: retrieving nmbclusters");
goto err;
}
mlen = sizeof(int);
if (sysctlbyname("kern.ipc.nmbufs", &nmbufs, &mlen, NULL, 0)
< 0) {
warn("sysctl: retrieving nmbufs");
goto err;
}
mlen = sizeof(u_int);
if (sysctlbyname("kern.ipc.mbuf_hiwm", &mbuf_hiwm, &mlen,
NULL, 0) < 0) {
warn("sysctl: retrieving mbuf_hiwm");
goto err;
}
mlen = sizeof(u_int);
if (sysctlbyname("kern.ipc.clust_hiwm", &clust_hiwm, &mlen,
NULL, 0) < 0) {
warn("sysctl: retrieving clust_hiwm");
goto err;
}
mlen = sizeof(u_int);
if (sysctlbyname("kern.ipc.mbuf_lowm", &mbuf_lowm, &mlen,
NULL, 0) < 0) {
warn("sysctl: retrieving mbuf_lowm");
goto err;
}
mlen = sizeof(u_int);
if (sysctlbyname("kern.ipc.clust_lowm", &clust_lowm, &mlen,
NULL, 0) < 0) {
warn("sysctl: retrieving clust_lowm");
goto err;
}
mlen = sizeof(int);
if (sysctlbyname("hw.pagesize", &page_size, &mlen, NULL, 0)
< 0) {
warn("sysctl: retrieving hw.pagesize");
goto err;
}
}
if (mbstat->m_mbufs < 0) mbstat->m_mbufs = 0; /* XXX */
if (mbstat->m_mclusts < 0) mbstat->m_mclusts = 0; /* XXX */
nmbtypes = mbstat->m_numtypes;
if ((seen = calloc(nmbtypes, sizeof(*seen))) == NULL) {
@ -223,59 +146,13 @@ mbpr(u_long mbaddr, u_long mbtaddr __unused, u_long nmbcaddr, u_long nmbufaddr,
goto err;
}
for (i = 0; i < num_objs; i++)
mbpstat[i] = mbpstat[0] + i;
#undef MSIZE
#define MSIZE (mbstat->m_msize)
#undef MCLBYTES
#define MCLBYTES (mbstat->m_mclbytes)
#define GENLST (num_objs - 1)
totnum = mbpstat[GENLST]->mb_mbbucks * mbstat->m_mbperbuck;
totfree = mbpstat[GENLST]->mb_mbfree;
for (j = 1; j < nmbtypes; j++)
mbtypes[j] += mbpstat[GENLST]->mb_mbtypes[j];
totspace[0] = mbpstat[GENLST]->mb_mbbucks * mbstat->m_mbperbuck * MSIZE;
for (i = 0; i < (num_objs - 1); i++) {
if (mbpstat[i]->mb_active == 0)
continue;
totspace[0] += mbpstat[i]->mb_mbbucks*mbstat->m_mbperbuck*MSIZE;
totnum += mbpstat[i]->mb_mbbucks * mbstat->m_mbperbuck;
totfree += mbpstat[i]->mb_mbfree;
for (j = 1; j < nmbtypes; j++)
mbtypes[j] += mbpstat[i]->mb_mbtypes[j];
}
totused[0] = totnum - totfree;
if (cflag) {
printf("mbuf usage:\n"
"\tTotal:\t\t%lu/%lu/%d (in use/in pool/max)\n",
totused[0], totnum, nmbufs);
gentotnum = mbpstat[GENLST]->mb_mbbucks * mbstat->m_mbperbuck;
gentotfree = mbpstat[GENLST]->mb_mbfree;
printf("\tGEN cache:\t%lu/%lu (in use/in pool)\n",
gentotnum - gentotfree, gentotnum);
} else {
/* XXX: peak is now wrong. */
printf("%lu/%lu/%d mbufs in use (current/peak/max):\n",
totused[0], totnum, nmbufs);
}
printf("%lu mbufs in use\n", mbstat->m_mbufs);
for (i = 0; cflag && i < (num_objs - 1); i++) {
if (mbpstat[i]->mb_active == 0)
continue;
printf("\tCPU #%d cache:\t%lu/%lu (in use/in pool)\n",
i,
(mbpstat[i]->mb_mbbucks * mbstat->m_mbperbuck -
mbpstat[i]->mb_mbfree),
(mbpstat[i]->mb_mbbucks * mbstat->m_mbperbuck));
}
if (cflag) {
printf("\tMbuf cache high watermark: %d\n", mbuf_hiwm);
#ifdef NOTYET
printf("\tMbuf cache low watermark: %d\n", mbuf_lowm);
#endif
}
for (mp = mbtypenames; mp->mt_name; mp++) {
if (mbtypes[mp->mt_type]) {
seen[mp->mt_type] = YES;
@ -288,53 +165,10 @@ mbpr(u_long mbaddr, u_long mbtaddr __unused, u_long nmbcaddr, u_long nmbufaddr,
printf("\t %lu mbufs allocated to <mbuf type: %d>\n",
mbtypes[i], i);
}
if (cflag)
printf("\t%.1f%% of mbuf map consumed\n",
totspace[0] * 100.0 / (nmbufs * MSIZE));
totnum = mbpstat[GENLST]->mb_clbucks * mbstat->m_clperbuck;
totfree = mbpstat[GENLST]->mb_clfree;
totspace[1] = mbpstat[GENLST]->mb_clbucks*mbstat->m_clperbuck*MCLBYTES;
for (i = 0; i < (num_objs - 1); i++) {
if (mbpstat[i]->mb_active == 0)
continue;
totspace[1] += mbpstat[i]->mb_clbucks * mbstat->m_clperbuck
* MCLBYTES;
totnum += mbpstat[i]->mb_clbucks * mbstat->m_clperbuck;
totfree += mbpstat[i]->mb_clfree;
}
totused[1] = totnum - totfree;
if (cflag) {
printf("mbuf cluster usage:\n"
"\tTotal:\t\t%lu/%lu/%d (in use/in pool/max)\n",
totused[1], totnum, nmbclusters);
gentotnum = mbpstat[GENLST]->mb_clbucks * mbstat->m_clperbuck;
gentotfree = mbpstat[GENLST]->mb_clfree;
printf("\tGEN cache:\t%lu/%lu (in use/in pool)\n",
gentotnum - gentotfree, gentotnum);
} else {
/* XXX: peak is now wrong. */
printf("%lu/%lu/%d mbuf clusters in use (current/peak/max)\n",
totused[1], totnum, nmbclusters);
}
for (i = 0; cflag && i < (num_objs - 1); i++) {
if (mbpstat[i]->mb_active == 0)
continue;
printf("\tCPU #%d cache:\t%lu/%lu (in use/in pool)\n",
i,
(mbpstat[i]->mb_clbucks * mbstat->m_clperbuck -
mbpstat[i]->mb_clfree),
(mbpstat[i]->mb_clbucks * mbstat->m_clperbuck));
}
if (cflag) {
printf("\tCluster cache high watermark: %d\n", clust_hiwm);
#ifdef NOTYET
printf("\tCluster cache low watermark: %d\n", clust_lowm);
#endif
}
if (cflag)
printf("\t%.1f%% of cluster map consumed\n",
totspace[1] * 100.0 / (nmbclusters * MCLBYTES));
printf("%lu/%d mbuf clusters in use (current/max)\n",
mbstat->m_mclusts, nmbclusters);
mlen = sizeof(nsfbufs);
if (!sysctlbyname("kern.ipc.nsfbufs", &nsfbufs, &mlen, NULL, 0) &&
!sysctlbyname("kern.ipc.nsfbufsused", &nsfbufsused, &mlen, NULL,
@ -344,15 +178,8 @@ mbpr(u_long mbaddr, u_long mbtaddr __unused, u_long nmbcaddr, u_long nmbufaddr,
printf("%d/%d/%d sfbufs in use (current/peak/max)\n",
nsfbufsused, nsfbufspeak, nsfbufs);
}
totmem = nmbufs * MSIZE + nmbclusters * MCLBYTES;
totmemalloced = totspace[0] + totspace[1];
totmemused = totused[0] * MSIZE + totused[1] * MCLBYTES;
printf(
"%lu KBytes allocated to network (%.1f%% in use, %.1f%% wired)\n",
totmem / 1024, totmemused * 100.0 / totmem,
totmemalloced * 100.0 / totmem);
printf("%lu requests for memory denied\n", mbstat->m_drops);
printf("%lu requests for memory delayed\n", mbstat->m_wait);
printf("%lu KBytes allocated to network\n", (mbstat->m_mbufs * MSIZE +
mbstat->m_mclusts * MCLBYTES) / 1024);
printf("%lu requests for sfbufs denied\n", mbstat->sf_allocfail);
printf("%lu requests for sfbufs delayed\n", mbstat->sf_allocwait);
printf("%lu requests for I/O initiated by sendfile\n",
@ -366,9 +193,4 @@ mbpr(u_long mbaddr, u_long mbtaddr __unused, u_long nmbcaddr, u_long nmbufaddr,
free(seen);
if (mbstat != NULL)
free(mbstat);
if (mbpstat != NULL) {
if (mbpstat[0] != NULL)
free(mbpstat[0]);
free(mbpstat);
}
}

View File

@ -181,7 +181,6 @@ or for a single
.Bk -words
.Nm
.Fl m
.Op Fl c
.Op Fl M Ar core
.Op Fl N Ar system
.Ek
@ -189,9 +188,6 @@ or for a single
Show statistics recorded by the memory management routines
.Pq Xr mbuf 9 .
The network manages a private pool of memory buffers.
The
.Fl c
option shows per-CPU statistics for caching.
.It Xo
.Bk -words
.Nm

View File

@ -39,7 +39,6 @@
extern int Aflag; /* show addresses of protocol control block */
extern int aflag; /* show all sockets (including servers) */
extern int bflag; /* show i/f total bytes in/out */
extern int cflag; /* show mbuf cache information */
extern int dflag; /* show i/f dropped packets */
extern int gflag; /* show group (multicast) routing or stats */
extern int iflag; /* show interfaces */

View File

@ -52,12 +52,9 @@ static const char sccsid[] = "@(#)mbufs.c 8.1 (Berkeley) 6/6/93";
#include "systat.h"
#include "extern.h"
static struct mbpstat **mbpstat;
static struct mbstat *mbstat;
static int num_objs;
static long *m_mbtypes;
static short nmbtypes;
#define GENLST (num_objs - 1)
static struct mtnames {
short mt_type;
@ -101,20 +98,11 @@ void
showmbufs()
{
int i, j, max, idx;
u_long totfree;
u_long totmbufs;
char buf[10];
const char *mtname;
totfree = mbpstat[GENLST]->mb_mbfree;
for (i = 1; i < nmbtypes; i++)
m_mbtypes[i] += mbpstat[GENLST]->mb_mbtypes[i];
for (i = 0; i < GENLST; i++) {
if (mbpstat[i]->mb_active == 0)
continue;
totfree += mbpstat[i]->mb_mbfree;
for (j = 1; j < nmbtypes; j++)
m_mbtypes[j] += mbpstat[i]->mb_mbtypes[j];
}
totmbufs = mbstat->m_mbufs;
/*
* Print totals for different mbuf types.
@ -159,16 +147,16 @@ showmbufs()
/*
* Print total number of free mbufs.
*/
if (totfree > 0) {
mvwprintw(wnd, 1+j, 0, "%-10.10s", "free");
if (totfree > 60) {
snprintf(buf, sizeof(buf), " %lu", totfree);
totfree = 60;
while(totfree--)
if (totmbufs > 0) {
mvwprintw(wnd, 1+j, 0, "%-10.10s", "Mbufs");
if (totmbufs > 60) {
snprintf(buf, sizeof(buf), " %lu", totmbufs);
totmbufs = 60;
while(totmbufs--)
waddch(wnd, 'X');
waddstr(wnd, buf);
} else {
while(totfree--)
while(totmbufs--)
waddch(wnd, 'X');
}
wclrtoeol(wnd);
@ -198,23 +186,6 @@ initmbufs()
return 0;
}
if (sysctlbyname("kern.ipc.mb_statpcpu", NULL, &len, NULL, 0) < 0) {
error("sysctl getting mbpstat total size failed");
return 0;
}
num_objs = (int)(len / sizeof(struct mbpstat));
if ((mbpstat = calloc(num_objs, sizeof(struct mbpstat *))) == NULL) {
error("calloc mbpstat pointers failed");
return 0;
}
if ((mbpstat[0] = calloc(num_objs, sizeof(struct mbpstat))) == NULL) {
error("calloc mbpstat structures failed");
return 0;
}
for (i = 0; i < num_objs; i++)
mbpstat[i] = mbpstat[0] + i;
return 1;
}
@ -223,7 +194,7 @@ fetchmbufs()
{
size_t len;
len = num_objs * sizeof(struct mbpstat);
if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat[0], &len, NULL, 0) < 0)
printw("sysctl: mbpstat: %s", strerror(errno));
len = sizeof *mbstat;
if (sysctlbyname("kern.ipc.mbstat", mbstat, &len, NULL, 0) < 0)
printw("sysctl: mbstat: %s", strerror(errno));
}