Introduce numerous SMP friendly changes to the mbuf allocator. Namely,

introduce a modified allocation mechanism for mbufs and mbuf clusters; one
which can scale under SMP and which offers the possibility of resource
reclamation to be implemented in the future. Notable advantages:

 o Reduce contention for SMP by offering per-CPU pools and locks.
 o Better use of data cache due to per-CPU pools.
 o Much less code cache pollution due to excessively large allocation macros.
 o Framework for `grouping' objects from same page together so as to be able
   to possibly free wired-down pages back to the system if they are no longer
   needed by the network stacks.

 Additional things changed with this addition:

  - Moved some mbuf specific declarations and initializations from
    sys/conf/param.c into mbuf-specific code where they belong.
  - m_getclr() has been renamed to m_get_clrd() because the old name is really
    confusing. m_getclr() HAS been preserved though and is defined to the new
    name. No tree sweep has been done "to change the interface," as the old
    name will continue to be supported and is not depracated. The change was
    merely done because m_getclr() sounds too much like "m_get a cluster."
  - TEMPORARILY disabled mbtypes statistics displaying in netstat(1) and
    systat(1) (see TODO below).
  - Fixed systat(1) to display number of "free mbufs" based on new per-CPU
    stat structures.
  - Fixed netstat(1) to display new per-CPU stats based on sysctl-exported
    per-CPU stat structures. All infos are fetched via sysctl.

 TODO (in order of priority):

  - Re-enable mbtypes statistics in both netstat(1) and systat(1) after
    introducing an SMP friendly way to collect the mbtypes stats under the
    already introduced per-CPU locks (i.e. hopefully don't use atomic() - it
    seems too costly for a mere stat update, especially when other locks are
    already present).
  - Optionally have systat(1) display not only "total free mbufs" but also
    "total free mbufs per CPU pool."
  - Fix minor length-fetching issues in netstat(1) related to recently
    re-enabled option to read mbuf stats from a core file.
  - Move reference counters at least for mbuf clusters into an unused portion
    of the cluster itself, to save space and need to allocate a counter.
  - Look into introducing resource freeing possibly from a kproc.

Reviewed by (in parts): jlemon, jake, silby, terry
Tested by: jlemon (Intel & Alpha), mjacob (Intel & Alpha)
Preliminary performance measurements: jlemon (and me, obviously)
URL: http://people.freebsd.org/~bmilekic/mb_alloc/
This commit is contained in:
Bosko Milekic 2001-06-22 06:35:32 +00:00
parent a080f2d84f
commit 08442f8a82
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=78592
16 changed files with 1435 additions and 989 deletions

View File

@ -796,6 +796,7 @@ kern/subr_diskslice.c standard
kern/subr_eventhandler.c standard
kern/subr_kobj.c standard
kern/subr_log.c standard
kern/subr_mbuf.c standard
kern/subr_mchain.c optional libmchain
kern/subr_module.c standard
kern/subr_pcpu.c standard

View File

@ -64,17 +64,10 @@ int tickadj = howmany(30000, 60 * HZ); /* can adjust 30ms in 60s */
#define MAXFILES (NPROC*2)
#endif
int maxproc = NPROC; /* maximum # of processes */
int maxprocperuid = NPROC-1; /* maximum # of processes per user */
int maxfiles = MAXFILES; /* system wide open files limit */
int maxfilesperproc = MAXFILES; /* per-process open files limit */
int maxprocperuid = NPROC-1; /* max # of procs per user */
int maxfiles = MAXFILES; /* sys. wide open files limit */
int maxfilesperproc = MAXFILES; /* per-proc open files limit */
int ncallout = 16 + NPROC + MAXFILES; /* maximum # of timer events */
int mbuf_wait = 32; /* mbuf sleep time in ticks */
/* maximum # of sf_bufs (sendfile(2) zero-copy virtual buffers) */
#ifndef NSFBUFS
#define NSFBUFS (512 + MAXUSERS * 16)
#endif
int nsfbufs = NSFBUFS;
/*
* These may be set to nonzero here or by patching.

View File

@ -474,8 +474,15 @@ kmeminit(dummy)
if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE))
vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE;
/*
* In mb_init(), we set up submaps for mbufs and clusters, in which
* case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES),
* respectively. Mathematically, this means that what we do here may
* amount to slightly more address space than we need for the submaps,
* but it never hurts to have an extra page in kmem_map.
*/
npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt *
sizeof(union mext_refcnt) + vm_kmem_size) / PAGE_SIZE;
sizeof(u_int) + vm_kmem_size) / PAGE_SIZE;
kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
(vm_size_t)(npg * sizeof(struct kmemusage)));

1029
sys/kern/subr_mbuf.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -64,17 +64,10 @@ int tickadj = howmany(30000, 60 * HZ); /* can adjust 30ms in 60s */
#define MAXFILES (NPROC*2)
#endif
int maxproc = NPROC; /* maximum # of processes */
int maxprocperuid = NPROC-1; /* maximum # of processes per user */
int maxfiles = MAXFILES; /* system wide open files limit */
int maxfilesperproc = MAXFILES; /* per-process open files limit */
int maxprocperuid = NPROC-1; /* max # of procs per user */
int maxfiles = MAXFILES; /* sys. wide open files limit */
int maxfilesperproc = MAXFILES; /* per-proc open files limit */
int ncallout = 16 + NPROC + MAXFILES; /* maximum # of timer events */
int mbuf_wait = 32; /* mbuf sleep time in ticks */
/* maximum # of sf_bufs (sendfile(2) zero-copy virtual buffers) */
#ifndef NSFBUFS
#define NSFBUFS (512 + MAXUSERS * 16)
#endif
int nsfbufs = NSFBUFS;
/*
* These may be set to nonzero here or by patching.

View File

@ -37,48 +37,18 @@
#include "opt_param.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/condvar.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <vm/vm.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
#ifndef NMBCLUSTERS
#define NMBCLUSTERS (512 + MAXUSERS * 16)
#endif
static void mbinit(void *);
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
struct mbuf *mbutl;
struct mbstat mbstat;
u_long mbtypes[MT_NTYPES];
int max_linkhdr;
int max_protohdr;
int max_hdr;
int max_datalen;
int nmbclusters = NMBCLUSTERS;
int nmbufs = NMBCLUSTERS * 4;
int nmbcnt;
u_long m_mballoc_wid = 0;
u_long m_clalloc_wid = 0;
/*
* freelist header structures...
* mbffree_lst, mclfree_lst, mcntfree_lst
*/
struct mbffree_lst mmbfree;
struct mclfree_lst mclfree;
struct mcntfree_lst mcntfree;
struct mtx mbuf_mtx;
/*
* sysctl(8) exported objects
@ -91,419 +61,6 @@ SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
&max_datalen, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
&mbuf_wait, 0, "");
SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, "");
SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
sizeof(mbtypes), "LU", "");
SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
&nmbclusters, 0, "Maximum number of mbuf clusters available");
SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
"Maximum number of mbufs available");
SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
"Maximum number of ext_buf counters available");
TUNABLE_INT("kern.ipc.nmbclusters", &nmbclusters);
TUNABLE_INT("kern.ipc.nmbufs", &nmbufs);
TUNABLE_INT("kern.ipc.nmbcnt", &nmbcnt);
static void m_reclaim(void);
/* Initial allocation numbers */
#define NCL_INIT 2
#define NMB_INIT 16
#define REF_INIT NMBCLUSTERS
static void
tunable_mbinit(void *dummy)
{
/*
* Sanity checks and pre-initialization for non-constants.
* This has to be done before VM initialization.
*/
if (nmbufs < nmbclusters * 2)
nmbufs = nmbclusters * 2;
if (nmbcnt == 0)
nmbcnt = EXT_COUNTERS;
}
SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
/*
* Full mbuf subsystem initialization done here.
*
* XXX: If ever we have system specific map setups to do, then move them to
* machdep.c - for now, there is no reason for this stuff to go there.
*/
static void
mbinit(void *dummy)
{
vm_offset_t maxaddr;
vm_size_t mb_map_size;
/*
* Setup the mb_map, allocate requested VM space.
*/
mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES +
nmbcnt * sizeof(union mext_refcnt));
mb_map_size = rounddown(mb_map_size, PAGE_SIZE);
mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
mb_map_size);
/* XXX XXX XXX: mb_map->system_map = 1; */
/*
* Initialize the free list headers, and setup locks for lists.
*/
mmbfree.m_head = NULL;
mclfree.m_head = NULL;
mcntfree.m_head = NULL;
mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF);
cv_init(&mmbfree.m_starved, "mbuf free list starved cv");
cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv");
/*
* Initialize mbuf subsystem (sysctl exported) statistics structure.
*/
mbstat.m_msize = MSIZE;
mbstat.m_mclbytes = MCLBYTES;
mbstat.m_minclsize = MINCLSIZE;
mbstat.m_mlen = MLEN;
mbstat.m_mhlen = MHLEN;
/*
* Perform some initial allocations.
*/
mtx_lock(&mbuf_mtx);
if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0)
goto bad;
if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
goto bad;
if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
goto bad;
mtx_unlock(&mbuf_mtx);
return;
bad:
panic("mbinit: failed to initialize mbuf subsystem!");
}
/*
* Allocate at least nmb reference count structs and place them
* on the ref cnt free list.
*
* Must be called with the mcntfree lock held.
*/
int
m_alloc_ref(u_int nmb, int how)
{
caddr_t p;
u_int nbytes;
int i;
/*
* We don't cap the amount of memory that can be used
* by the reference counters, like we do for mbufs and
* mbuf clusters. In fact, we're absolutely sure that we
* won't ever be going over our allocated space. We keep enough
* space in mb_map to accomodate maximum values of allocatable
* external buffers including, but not limited to, clusters.
* (That's also why we won't have to have wait routines for
* counters).
*
* If we're in here, we're absolutely certain to be returning
* succesfully, as long as there is physical memory to accomodate
* us. And if there isn't, but we're willing to wait, then
* kmem_malloc() will do the only waiting needed.
*/
nbytes = round_page(nmb * sizeof(union mext_refcnt));
if (1 /* XXX: how == M_TRYWAIT */)
mtx_unlock(&mbuf_mtx);
if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
M_WAITOK : M_NOWAIT)) == NULL) {
if (1 /* XXX: how == M_TRYWAIT */)
mtx_lock(&mbuf_mtx);
return (0);
}
nmb = nbytes / sizeof(union mext_refcnt);
/*
* We don't let go of the mutex in order to avoid a race.
* It is up to the caller to let go of the mutex.
*/
if (1 /* XXX: how == M_TRYWAIT */)
mtx_lock(&mbuf_mtx);
for (i = 0; i < nmb; i++) {
((union mext_refcnt *)p)->next_ref = mcntfree.m_head;
mcntfree.m_head = (union mext_refcnt *)p;
p += sizeof(union mext_refcnt);
mbstat.m_refree++;
}
mbstat.m_refcnt += nmb;
return (1);
}
/*
* Allocate at least nmb mbufs and place on mbuf free list.
*
* Must be called with the mmbfree lock held.
*/
int
m_mballoc(int nmb, int how)
{
caddr_t p;
int i;
int nbytes;
nbytes = round_page(nmb * MSIZE);
nmb = nbytes / MSIZE;
/*
* If we've hit the mbuf limit, stop allocating from mb_map.
* Also, once we run out of map space, it will be impossible to
* get any more (nothing is ever freed back to the map).
*/
if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs))
return (0);
if (1 /* XXX: how == M_TRYWAIT */)
mtx_unlock(&mbuf_mtx);
p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
M_WAITOK : M_NOWAIT);
if (1 /* XXX: how == M_TRYWAIT */) {
mtx_lock(&mbuf_mtx);
if (p == NULL)
mbstat.m_wait++;
}
/*
* Either the map is now full, or `how' is M_DONTWAIT and there
* are no pages left.
*/
if (p == NULL)
return (0);
/*
* We don't let go of the mutex in order to avoid a race.
* It is up to the caller to let go of the mutex when done
* with grabbing the mbuf from the free list.
*/
for (i = 0; i < nmb; i++) {
((struct mbuf *)p)->m_next = mmbfree.m_head;
mmbfree.m_head = (struct mbuf *)p;
p += MSIZE;
}
mbstat.m_mbufs += nmb;
mbtypes[MT_FREE] += nmb;
return (1);
}
/*
* Once the mb_map has been exhausted and if the call to the allocation macros
* (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to
* rely solely on reclaimed mbufs.
*
* Here we request for the protocols to free up some resources and, if we
* still cannot get anything, then we wait for an mbuf to be freed for a
* designated (mbuf_wait) time, at most.
*
* Must be called with the mmbfree mutex held.
*/
struct mbuf *
m_mballoc_wait(void)
{
struct mbuf *p = NULL;
/*
* See if we can drain some resources out of the protocols.
* We drop the mmbfree mutex to avoid recursing into it in some of
* the drain routines. Clearly, we're faced with a race here because
* once something is freed during the drain, it may be grabbed right
* from under us by some other thread. But we accept this possibility
* in order to avoid a potentially large lock recursion and, more
* importantly, to avoid a potential lock order reversal which may
* result in deadlock (See comment above m_reclaim()).
*/
mtx_unlock(&mbuf_mtx);
m_reclaim();
mtx_lock(&mbuf_mtx);
_MGET(p, M_DONTWAIT);
if (p == NULL) {
int retval;
m_mballoc_wid++;
retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx,
mbuf_wait);
m_mballoc_wid--;
/*
* If we got signaled (i.e. didn't time out), allocate.
*/
if (retval == 0)
_MGET(p, M_DONTWAIT);
}
if (p != NULL) {
mbstat.m_wait++;
if (mmbfree.m_head != NULL)
MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved);
}
return (p);
}
/*
* Allocate some number of mbuf clusters
* and place on cluster free list.
*
* Must be called with the mclfree lock held.
*/
int
m_clalloc(int ncl, int how)
{
caddr_t p;
int i;
int npg_sz;
npg_sz = round_page(ncl * MCLBYTES);
ncl = npg_sz / MCLBYTES;
/*
* If the map is now full (nothing will ever be freed to it).
* If we've hit the mcluster number limit, stop allocating from
* mb_map.
*/
if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters))
return (0);
if (1 /* XXX: how == M_TRYWAIT */)
mtx_unlock(&mbuf_mtx);
p = (caddr_t)kmem_malloc(mb_map, npg_sz,
how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
if (1 /* XXX: how == M_TRYWAIT */)
mtx_lock(&mbuf_mtx);
/*
* Either the map is now full, or `how' is M_DONTWAIT and there
* are no pages left.
*/
if (p == NULL)
return (0);
for (i = 0; i < ncl; i++) {
((union mcluster *)p)->mcl_next = mclfree.m_head;
mclfree.m_head = (union mcluster *)p;
p += MCLBYTES;
mbstat.m_clfree++;
}
mbstat.m_clusters += ncl;
return (1);
}
/*
* Once the mb_map submap has been exhausted and the allocation is called with
* M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will
* block on a cv for a designated amount of time (mbuf_wait) or until we're
* signaled due to sudden mcluster availability.
*
* Must be called with the mclfree lock held.
*/
caddr_t
m_clalloc_wait(void)
{
caddr_t p = NULL;
int retval;
m_clalloc_wid++;
retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait);
m_clalloc_wid--;
/*
* Now that we (think) that we've got something, try again.
*/
if (retval == 0)
_MCLALLOC(p, M_DONTWAIT);
if (p != NULL) {
mbstat.m_wait++;
if (mclfree.m_head != NULL)
MBWAKEUP(m_clalloc_wid, &mclfree.m_starved);
}
return (p);
}
/*
* m_reclaim: drain protocols in hopes to free up some resources...
*
* XXX: No locks should be held going in here. The drain routines have
* to presently acquire some locks which raises the possibility of lock
* order violation if we're holding any mutex if that mutex is acquired in
* reverse order relative to one of the locks in the drain routines.
*/
static void
m_reclaim(void)
{
struct domain *dp;
struct protosw *pr;
#ifdef WITNESS
KASSERT(witness_list(curproc) == 0,
("m_reclaim called with locks held"));
#endif
for (dp = domains; dp; dp = dp->dom_next)
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_drain)
(*pr->pr_drain)();
mbstat.m_drain++;
}
/*
* Space allocation routines.
* Some of these are also available as macros
* for critical paths.
*/
struct mbuf *
m_get(int how, int type)
{
struct mbuf *m;
MGET(m, how, type);
return (m);
}
struct mbuf *
m_gethdr(int how, int type)
{
struct mbuf *m;
MGETHDR(m, how, type);
return (m);
}
struct mbuf *
m_getclr(int how, int type)
{
struct mbuf *m;
MGET(m, how, type);
if (m != NULL)
bzero(mtod(m, caddr_t), MLEN);
return (m);
}
struct mbuf *
m_free(struct mbuf *m)
{
struct mbuf *n;
MFREE(m, n);
return (n);
}
/*
* struct mbuf *
@ -672,17 +229,13 @@ m_copym(struct mbuf *m, int off0, int len, int wait)
m = m->m_next;
np = &n->m_next;
}
if (top == NULL) {
mtx_lock(&mbuf_mtx);
mbstat.m_mcfail++;
mtx_unlock(&mbuf_mtx);
}
if (top == NULL)
mbstat.m_mcfail++; /* XXX: No consistency. */
return (top);
nospace:
m_freem(top);
mtx_lock(&mbuf_mtx);
mbstat.m_mcfail++;
mtx_unlock(&mbuf_mtx);
mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@ -741,9 +294,7 @@ m_copypacket(struct mbuf *m, int how)
return top;
nospace:
m_freem(top);
mtx_lock(&mbuf_mtx);
mbstat.m_mcfail++;
mtx_unlock(&mbuf_mtx);
mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@ -844,9 +395,7 @@ m_dup(struct mbuf *m, int how)
nospace:
m_freem(top);
mtx_lock(&mbuf_mtx);
mbstat.m_mcfail++;
mtx_unlock(&mbuf_mtx);
mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@ -1008,9 +557,7 @@ m_pullup(struct mbuf *n, int len)
return (m);
bad:
m_freem(n);
mtx_lock(&mbuf_mtx);
mbstat.m_mpfail++;
mtx_unlock(&mbuf_mtx);
mbstat.m_mpfail++; /* XXX: No consistency. */
return (NULL);
}
@ -1157,7 +704,7 @@ m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
off -= mlen;
totlen += mlen;
if (m->m_next == NULL) {
n = m_getclr(M_DONTWAIT, m->m_type);
n = m_get_clrd(M_DONTWAIT, m->m_type);
if (n == NULL)
goto out;
n->m_len = min(MLEN, len + off);

View File

@ -37,42 +37,20 @@
#ifndef _SYS_MBUF_H_
#define _SYS_MBUF_H_
#ifdef _KERNEL
#include <sys/condvar.h> /* XXX */
#include <sys/_lock.h>
#include <sys/_mutex.h>
#endif /* _KERNEL */
/*
* Mbufs are of a single size, MSIZE (machine/param.h), which
* includes overhead. An mbuf may add a single "mbuf cluster" of size
* MCLBYTES (also in machine/param.h), which has no additional overhead
* and is used instead of the internal data area; this is done when
* at least MINCLSIZE of data must be stored.
* at least MINCLSIZE of data must be stored. Additionally, it is possible
* to allocate a separate buffer externally and attach it to the mbuf in
* a way similar to that of mbuf clusters.
*/
#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */
#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */
#define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */
#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
/*
* Maximum number of allocatable counters for external buffers. This
* ensures enough VM address space for the allocation of counters
* in the extreme case where all possible external buffers are allocated.
*
* Note: When new types of external storage are allocated, EXT_COUNTERS
* must be tuned accordingly. Practically, this isn't a big deal
* as each counter is only a word long, so we can fit
* (PAGE_SIZE / length of word) counters in a single page.
*
* XXX: Must increase this if using any of if_ti, if_wb, if_sk drivers,
* or any other drivers which may manage their own buffers and
* eventually attach them to mbufs.
*/
#define EXT_COUNTERS (nmbclusters + nsfbufs)
#ifdef _KERNEL
/*
* Macros for type conversion
@ -83,7 +61,9 @@
#define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1)))
#endif /* _KERNEL */
/* header at beginning of each mbuf: */
/*
* Header present at the beginning of every mbuf.
*/
struct m_hdr {
struct mbuf *mh_next; /* next buffer in chain */
struct mbuf *mh_nextpkt; /* next chain in queue/record */
@ -93,7 +73,9 @@ struct m_hdr {
short mh_flags; /* flags; see below */
};
/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
/*
* Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
*/
struct pkthdr {
struct ifnet *rcvif; /* rcv interface */
int len; /* total packet length */
@ -105,17 +87,23 @@ struct pkthdr {
struct mbuf *aux; /* extra data buffer; ipsec/others */
};
/* description of external storage mapped into mbuf, valid if M_EXT set */
/*
* Description of external storage mapped into mbuf; valid only if M_EXT is set.
*/
struct m_ext {
caddr_t ext_buf; /* start of buffer */
void (*ext_free) /* free routine if not the usual */
(caddr_t, void *);
void *ext_args; /* optional argument pointer */
u_int ext_size; /* size of buffer, for ext_free */
union mext_refcnt *ref_cnt; /* pointer to ref count info */
u_int *ref_cnt; /* pointer to ref count info */
int ext_type; /* type of external storage */
};
/*
* The core of the mbuf object along with some shortcut defines for
* practical purposes.
*/
struct mbuf {
struct m_hdr m_hdr;
union {
@ -141,7 +129,9 @@ struct mbuf {
#define m_pktdat M_dat.MH.MH_dat.MH_databuf
#define m_dat M_dat.M_databuf
/* mbuf flags */
/*
* mbuf flags
*/
#define M_EXT 0x0001 /* has associated external storage */
#define M_PKTHDR 0x0002 /* start of record */
#define M_EOR 0x0004 /* end of record */
@ -152,24 +142,32 @@ struct mbuf {
#define M_PROTO4 0x0080 /* protocol-specific */
#define M_PROTO5 0x0100 /* protocol-specific */
/* mbuf pkthdr flags, also in m_flags */
/*
* mbuf pkthdr flags (also stored in m_flags)
*/
#define M_BCAST 0x0200 /* send/received as link-level broadcast */
#define M_MCAST 0x0400 /* send/received as link-level multicast */
#define M_FRAG 0x0800 /* packet is a fragment of a larger packet */
#define M_FIRSTFRAG 0x1000 /* packet is first fragment */
#define M_LASTFRAG 0x2000 /* packet is last fragment */
/* external buffer types: identify ext_buf type */
/*
* External buffer types: identify ext_buf type
*/
#define EXT_CLUSTER 1 /* mbuf cluster */
#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */
#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */
#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */
/* flags copied when copying m_pkthdr */
/*
* Flags copied when copying m_pkthdr
*/
#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \
M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG|M_RDONLY)
/* flags indicating hw checksum support and sw checksum requirements */
/*
* Flags indicating hw checksum support and sw checksum requirements
*/
#define CSUM_IP 0x0001 /* will csum IP */
#define CSUM_TCP 0x0002 /* will csum TCP */
#define CSUM_UDP 0x0004 /* will csum UDP */
@ -184,7 +182,9 @@ struct mbuf {
#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
#define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */
/* mbuf types */
/*
* mbuf types
*/
#define MT_FREE 0 /* should be on free list */
#define MT_DATA 1 /* dynamic (data) allocation */
#define MT_HEADER 2 /* packet header */
@ -209,17 +209,26 @@ struct mbuf {
#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
/*
* mbuf statistics
* Mbuf and cluster allocation statistics PCPU structure.
*/
struct mbpstat {
u_long mb_mbfree;
u_long mb_mbpgs;
u_long mb_clfree;
u_long mb_clpgs;
};
/*
* General mbuf statistics structure.
* XXX: Modifications of these are not protected by any mutex locks nor by
* any atomic() manipulations. As a result, we may occasionally lose
* a count or two. Luckily, not all of these fields are modified at all
* and remain static, and those that are manipulated are only manipulated
* in failure situations, which do not occur (hopefully) very often.
*/
struct mbstat {
u_long m_mbufs; /* # mbufs obtained from page pool */
u_long m_clusters; /* # clusters obtained from page pool */
u_long m_clfree; /* # clusters on freelist (cache) */
u_long m_refcnt; /* # ref counters obtained from page pool */
u_long m_refree; /* # ref counters on freelist (cache) */
u_long m_spare; /* spare field */
u_long m_drops; /* times failed to find space */
u_long m_wait; /* times waited for space */
u_long m_drops; /* times failed to allocate */
u_long m_wait; /* times succesfully returned from wait */
u_long m_drain; /* times drained protocols for space */
u_long m_mcfail; /* times m_copym failed */
u_long m_mpfail; /* times m_pullup failed */
@ -230,329 +239,75 @@ struct mbstat {
u_long m_mhlen; /* length of data in a header mbuf */
};
/* flags to m_get/MGET */
/*
* Flags specifying how an allocation should be made.
* M_DONTWAIT means "don't block if nothing is available" whereas
* M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is
* available."
*/
#define M_DONTWAIT 1
#define M_TRYWAIT 0
#define M_WAIT M_TRYWAIT /* XXX: Deprecated. */
/*
* Normal mbuf clusters are normally treated as character arrays
* after allocation, but use the first word of the buffer as a free list
* pointer while on the free list.
*/
union mcluster {
union mcluster *mcl_next;
char mcl_buf[MCLBYTES];
};
/*
* The m_ext object reference counter structure.
*/
union mext_refcnt {
union mext_refcnt *next_ref;
u_int refcnt;
};
#ifdef _KERNEL
/*
* The freelists for mbufs and mbuf clusters include condition variables
* that are used in cases of depletion/starvation.
* The counter freelist does not require a condition variable as we never
* expect to consume more than the reserved address space for counters.
* All are presently protected by the mbuf_mtx lock.
*/
struct mbffree_lst {
struct mbuf *m_head;
struct cv m_starved;
};
struct mclfree_lst {
union mcluster *m_head;
struct cv m_starved;
};
struct mcntfree_lst {
union mext_refcnt *m_head;
};
/*
* Signal a single instance (if any) blocked on a m_starved cv (i.e. an
* instance waiting for an {mbuf, cluster} to be freed to the global
* cache lists).
*
* Must be called with mbuf_mtx held.
*/
#define MBWAKEUP(m_wid, m_cv) do { \
if ((m_wid) > 0) \
cv_signal((m_cv)); \
} while (0)
/*
* mbuf external reference count management macros:
* mbuf external reference count management macros
*
* MEXT_IS_REF(m): true if (m) is not the only mbuf referencing
* the external buffer ext_buf
*
* MEXT_REM_REF(m): remove reference to m_ext object
*
* MEXT_ADD_REF(m): add reference to m_ext object already
* referred to by (m)
* MEXT_INIT_REF(m): allocate and initialize an external
* object reference counter for (m)
*/
#define MEXT_IS_REF(m) ((m)->m_ext.ref_cnt->refcnt > 1)
#define MEXT_IS_REF(m) (*((m)->m_ext.ref_cnt) > 1)
#define MEXT_REM_REF(m) do { \
KASSERT((m)->m_ext.ref_cnt->refcnt > 0, ("m_ext refcnt < 0")); \
atomic_subtract_int(&((m)->m_ext.ref_cnt->refcnt), 1); \
KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \
atomic_subtract_int((m)->m_ext.ref_cnt, 1); \
} while(0)
#define MEXT_ADD_REF(m) atomic_add_int(&((m)->m_ext.ref_cnt->refcnt), 1)
#define _MEXT_ALLOC_CNT(m_cnt, how) do { \
union mext_refcnt *__mcnt; \
\
mtx_lock(&mbuf_mtx); \
if (mcntfree.m_head == NULL) \
m_alloc_ref(1, (how)); \
__mcnt = mcntfree.m_head; \
if (__mcnt != NULL) { \
mcntfree.m_head = __mcnt->next_ref; \
mbstat.m_refree--; \
__mcnt->refcnt = 0; \
} \
mtx_unlock(&mbuf_mtx); \
(m_cnt) = __mcnt; \
} while (0)
#define _MEXT_DEALLOC_CNT(m_cnt) do { \
union mext_refcnt *__mcnt = (m_cnt); \
\
mtx_lock(&mbuf_mtx); \
__mcnt->next_ref = mcntfree.m_head; \
mcntfree.m_head = __mcnt; \
mbstat.m_refree++; \
mtx_unlock(&mbuf_mtx); \
} while (0)
#define MEXT_INIT_REF(m, how) do { \
struct mbuf *__mmm = (m); \
\
_MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt, (how)); \
if (__mmm->m_ext.ref_cnt != NULL) \
MEXT_ADD_REF(__mmm); \
} while (0)
#define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1)
/*
* mbuf allocation/deallocation macros:
*
* MGET(struct mbuf *m, int how, int type)
* allocates an mbuf and initializes it to contain internal data.
*
* MGETHDR(struct mbuf *m, int how, int type)
* allocates an mbuf and initializes it to contain a packet header
* and internal data.
* mbuf, cluster, and external object allocation macros
* (for compatibility purposes)
*/
/*
* Lower-level macros for MGET(HDR)... Not to be used outside the
* subsystem ("non-exportable" macro names are prepended with "_").
*/
#define _MGET_SETUP(m_set, m_set_type) do { \
(m_set)->m_type = (m_set_type); \
(m_set)->m_next = NULL; \
(m_set)->m_nextpkt = NULL; \
(m_set)->m_data = (m_set)->m_dat; \
(m_set)->m_flags = 0; \
#define MGET(m, how, type) \
(m) = m_get((how), (type))
#define MGETHDR(m, how, type) \
(m) = m_gethdr((how), (type))
#define MCLGET(m, how) \
m_clget((m), (how))
#define MEXTADD(m, buf, size, free, args, flags, type) \
m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type))
#define MFREE(m, n) do { \
(n) = m_free((m)); \
(m) = NULL; \
} while (0)
#define _MGET(m_mget, m_get_how) do { \
if (mmbfree.m_head == NULL) \
m_mballoc(1, (m_get_how)); \
(m_mget) = mmbfree.m_head; \
if ((m_mget) != NULL) { \
mmbfree.m_head = (m_mget)->m_next; \
mbtypes[MT_FREE]--; \
} else { \
if ((m_get_how) == M_TRYWAIT) \
(m_mget) = m_mballoc_wait(); \
} \
} while (0)
#define MGET(m, how, type) do { \
struct mbuf *_mm; \
int _mhow = (how); \
int _mtype = (type); \
\
mtx_lock(&mbuf_mtx); \
_MGET(_mm, _mhow); \
if (_mm != NULL) { \
mbtypes[_mtype]++; \
mtx_unlock(&mbuf_mtx); \
_MGET_SETUP(_mm, _mtype); \
} else { \
mbstat.m_drops++; \
mtx_unlock(&mbuf_mtx); \
} \
(m) = _mm; \
} while (0)
#define _MGETHDR_SETUP(m_set, m_set_type) do { \
(m_set)->m_type = (m_set_type); \
(m_set)->m_next = NULL; \
(m_set)->m_nextpkt = NULL; \
(m_set)->m_data = (m_set)->m_pktdat; \
(m_set)->m_flags = M_PKTHDR; \
(m_set)->m_pkthdr.rcvif = NULL; \
(m_set)->m_pkthdr.csum_flags = 0; \
(m_set)->m_pkthdr.aux = NULL; \
} while (0)
#define MGETHDR(m, how, type) do { \
struct mbuf *_mm; \
int _mhow = (how); \
int _mtype = (type); \
\
mtx_lock(&mbuf_mtx); \
_MGET(_mm, _mhow); \
if (_mm != NULL) { \
mbtypes[_mtype]++; \
mtx_unlock(&mbuf_mtx); \
_MGETHDR_SETUP(_mm, _mtype); \
} else { \
mbstat.m_drops++; \
mtx_unlock(&mbuf_mtx); \
} \
(m) = _mm; \
} while (0)
#define m_getclr m_get_clrd
/*
* mbuf external storage macros:
*
* MCLGET allocates and refers an mcluster to an mbuf
* MEXTADD sets up pre-allocated external storage and refers to mbuf
* MEXTFREE removes reference to external object and frees it if
* necessary
*/
#define _MCLALLOC(p, how) do { \
caddr_t _mp; \
int _mhow = (how); \
\
if (mclfree.m_head == NULL) \
m_clalloc(1, _mhow); \
_mp = (caddr_t)mclfree.m_head; \
if (_mp != NULL) { \
mbstat.m_clfree--; \
mclfree.m_head = ((union mcluster *)_mp)->mcl_next; \
} else { \
if (_mhow == M_TRYWAIT) \
_mp = m_clalloc_wait(); \
} \
(p) = _mp; \
} while (0)
#define MCLGET(m, how) do { \
struct mbuf *_mm = (m); \
\
mtx_lock(&mbuf_mtx); \
_MCLALLOC(_mm->m_ext.ext_buf, (how)); \
if (_mm->m_ext.ext_buf != NULL) { \
mtx_unlock(&mbuf_mtx); \
MEXT_INIT_REF(_mm, (how)); \
if (_mm->m_ext.ref_cnt == NULL) { \
_MCLFREE(_mm->m_ext.ext_buf); \
_mm->m_ext.ext_buf = NULL; \
} else { \
_mm->m_data = _mm->m_ext.ext_buf; \
_mm->m_flags |= M_EXT; \
_mm->m_ext.ext_free = NULL; \
_mm->m_ext.ext_args = NULL; \
_mm->m_ext.ext_size = MCLBYTES; \
_mm->m_ext.ext_type = EXT_CLUSTER; \
} \
} else { \
mbstat.m_drops++; \
mtx_unlock(&mbuf_mtx); \
} \
} while (0)
#define MEXTADD(m, buf, size, free, args, flags, type) do { \
struct mbuf *_mm = (m); \
\
MEXT_INIT_REF(_mm, M_TRYWAIT); \
if (_mm->m_ext.ref_cnt != NULL) { \
_mm->m_flags |= (M_EXT | (flags)); \
_mm->m_ext.ext_buf = (caddr_t)(buf); \
_mm->m_data = _mm->m_ext.ext_buf; \
_mm->m_ext.ext_size = (size); \
_mm->m_ext.ext_free = (free); \
_mm->m_ext.ext_args = (args); \
_mm->m_ext.ext_type = (type); \
} \
} while (0)
#define _MCLFREE(p) do { \
union mcluster *_mp = (union mcluster *)(p); \
\
mtx_lock(&mbuf_mtx); \
_mp->mcl_next = mclfree.m_head; \
mclfree.m_head = _mp; \
mbstat.m_clfree++; \
MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); \
mtx_unlock(&mbuf_mtx); \
} while (0)
/* MEXTFREE:
* MEXTFREE(m): disassociate (and possibly free) an external object from `m'
*
* If the atomic_cmpset_int() returns 0, then we effectively do nothing
* in terms of "cleaning up" (freeing the ext buf and ref. counter) as
* this means that either there are still references, or another thread
* is taking care of the clean-up.
*/
#define MEXTFREE(m) do { \
struct mbuf *_mmm = (m); \
struct mbuf *_mb = (m); \
\
MEXT_REM_REF(_mmm); \
if (atomic_cmpset_int(&_mmm->m_ext.ref_cnt->refcnt, 0, 1)) { \
if (_mmm->m_ext.ext_type != EXT_CLUSTER) { \
(*(_mmm->m_ext.ext_free))(_mmm->m_ext.ext_buf, \
_mmm->m_ext.ext_args); \
} else \
_MCLFREE(_mmm->m_ext.ext_buf); \
_MEXT_DEALLOC_CNT(_mmm->m_ext.ref_cnt); \
} \
_mmm->m_flags &= ~M_EXT; \
} while (0)
/*
* MFREE(struct mbuf *m, struct mbuf *n)
* Free a single mbuf and associated external storage.
* Place the successor, if any, in n.
*
* we do need to check non-first mbuf for m_aux, since some of existing
* code does not call M_PREPEND properly.
* (example: call to bpf_mtap from drivers)
*/
#define MFREE(m, n) do { \
struct mbuf *_mm = (m); \
struct mbuf *_aux; \
\
KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf")); \
if (_mm->m_flags & M_EXT) \
MEXTFREE(_mm); \
mtx_lock(&mbuf_mtx); \
mbtypes[_mm->m_type]--; \
if ((_mm->m_flags & M_PKTHDR) != 0 && _mm->m_pkthdr.aux) { \
_aux = _mm->m_pkthdr.aux; \
_mm->m_pkthdr.aux = NULL; \
} else { \
_aux = NULL; \
} \
_mm->m_type = MT_FREE; \
mbtypes[MT_FREE]++; \
(n) = _mm->m_next; \
_mm->m_next = mmbfree.m_head; \
mmbfree.m_head = _mm; \
MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); \
mtx_unlock(&mbuf_mtx); \
if (_aux) \
m_freem(_aux); \
MEXT_REM_REF(_mb); \
if (atomic_cmpset_int(_mb->m_ext.ref_cnt, 0, 1)) \
_mext_free(_mb); \
_mb->m_flags &= ~M_EXT; \
} while (0)
/*
@ -570,8 +325,8 @@ struct mcntfree_lst {
* aux pointer will be moved to `to'.
*/
#define M_COPY_PKTHDR(to, from) do { \
struct mbuf *_mfrom = (from); \
struct mbuf *_mto = (to); \
struct mbuf *_mfrom = (from); \
struct mbuf *_mto = (to); \
\
_mto->m_data = _mto->m_pktdat; \
_mto->m_flags = _mfrom->m_flags & M_COPYFLAGS; \
@ -621,10 +376,10 @@ struct mcntfree_lst {
* set to NULL.
*/
#define M_PREPEND(m, plen, how) do { \
struct mbuf **_mmp = &(m); \
struct mbuf *_mm = *_mmp; \
int _mplen = (plen); \
int __mhow = (how); \
struct mbuf **_mmp = &(m); \
struct mbuf *_mm = *_mmp; \
int _mplen = (plen); \
int __mhow = (how); \
\
if (M_LEADINGSPACE(_mm) >= _mplen) { \
_mm->m_data -= _mplen; \
@ -639,16 +394,7 @@ struct mcntfree_lst {
/*
* change mbuf to new type
*/
#define MCHTYPE(m, t) do { \
struct mbuf *_mm = (m); \
int _mt = (t); \
\
mtx_lock(&mbuf_mtx); \
mbtypes[_mm->m_type]--; \
mbtypes[_mt]++; \
mtx_unlock(&mbuf_mtx); \
_mm->m_type = (_mt); \
} while (0)
#define MCHTYPE(m, t) (m)->m_type = (t)
/* length to m_copy to copy all */
#define M_COPYALL 1000000000
@ -665,55 +411,46 @@ struct mauxtag {
void* p;
};
extern u_long m_clalloc_wid; /* mbuf cluster wait count */
extern u_long m_mballoc_wid; /* mbuf wait count */
extern int max_datalen; /* MHLEN - max_hdr */
extern int max_hdr; /* largest link+protocol header */
extern int max_linkhdr; /* largest link-level header */
extern int max_protohdr; /* largest protocol header */
extern struct mbstat mbstat;
extern u_long mbtypes[MT_NTYPES]; /* per-type mbuf allocations */
extern int mbuf_wait; /* mbuf sleep time */
extern struct mtx mbuf_mtx;
extern struct mbuf *mbutl; /* virtual address of mclusters */
extern struct mclfree_lst mclfree;
extern struct mcntfree_lst mcntfree;
extern struct mbffree_lst mmbfree;
extern int nmbclusters;
extern int nmbcnt;
extern int nmbufs;
extern int nsfbufs;
extern int max_datalen; /* MHLEN - max_hdr */
extern int max_hdr; /* largest link + protocol header */
extern int max_linkhdr; /* largest link-level header */
extern int max_protohdr; /* largest protocol header */
extern struct mbpstat mb_statpcpu[]; /* Per-CPU allocation stats. */
extern struct mbstat mbstat; /* General mbuf stats/infos. */
extern int nmbclusters; /* Maximum number of clusters */
extern int nmbcnt; /* Scale kmem_map for counter space */
extern int nmbufs; /* Maximum number of mbufs */
extern int nsfbufs; /* Number of sendfile(2) bufs */
void m_adj(struct mbuf *, int);
int m_alloc_ref(u_int, int);
struct mbuf *m_aux_add2 __P((struct mbuf *, int, int, void *));
struct mbuf *m_aux_find2 __P((struct mbuf *, int, int, void *));
struct mbuf *m_aux_add(struct mbuf *, int, int);
void m_aux_delete(struct mbuf *, struct mbuf *);
struct mbuf *m_aux_find(struct mbuf *, int, int);
void m_cat(struct mbuf *, struct mbuf *);
int m_clalloc(int, int);
caddr_t m_clalloc_wait(void);
void m_copyback(struct mbuf *, int, int, caddr_t);
void m_copydata(struct mbuf *, int, int, caddr_t);
struct mbuf *m_copym(struct mbuf *, int, int, int);
struct mbuf *m_copypacket(struct mbuf *, int);
struct mbuf *m_devget(char *, int, int, struct ifnet *,
void (*copy)(char *, caddr_t, u_int));
struct mbuf *m_dup(struct mbuf *, int);
struct mbuf *m_free(struct mbuf *);
void m_freem(struct mbuf *);
struct mbuf *m_get(int, int);
struct mbuf *m_getclr(int, int);
struct mbuf *m_gethdr(int, int);
struct mbuf *m_getm(struct mbuf *, int, int, int);
int m_mballoc(int, int);
struct mbuf *m_mballoc_wait(void);
struct mbuf *m_prepend(struct mbuf *, int, int);
void m_print(const struct mbuf *m);
struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
struct mbuf *m_pullup(struct mbuf *, int);
struct mbuf *m_split(struct mbuf *, int, int);
void m_adj(struct mbuf *, int);
struct mbuf *m_aux_add(struct mbuf *, int, int);
struct mbuf *m_aux_add2(struct mbuf *, int, int, void *);
void m_aux_delete(struct mbuf *, struct mbuf *);
struct mbuf *m_aux_find(struct mbuf *, int, int);
struct mbuf *m_aux_find2(struct mbuf *, int, int, void *);
void m_cat(struct mbuf *, struct mbuf *);
void m_clget(struct mbuf *, int);
void m_extadd(struct mbuf *, caddr_t, u_int,
void (*free)(caddr_t, void *), void *, short, int);
void m_copyback(struct mbuf *, int, int, caddr_t);
void m_copydata(struct mbuf *, int, int, caddr_t);
struct mbuf *m_copym(struct mbuf *, int, int, int);
struct mbuf *m_copypacket(struct mbuf *, int);
struct mbuf *m_devget(char *, int, int, struct ifnet *,
void (*copy)(char *, caddr_t, u_int));
struct mbuf *m_dup(struct mbuf *, int);
struct mbuf *m_free(struct mbuf *);
void m_freem(struct mbuf *);
struct mbuf *m_get(int, int);
struct mbuf *m_get_clrd(int, int);
struct mbuf *m_gethdr(int, int);
struct mbuf *m_gethdr_clrd(int, int);
struct mbuf *m_getm(struct mbuf *, int, int, int);
struct mbuf *m_prepend(struct mbuf *, int, int);
void m_print(const struct mbuf *m);
struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
struct mbuf *m_pullup(struct mbuf *, int);
struct mbuf *m_split(struct mbuf *, int, int);
#endif /* _KERNEL */
#endif /* !_SYS_MBUF_H_ */

View File

@ -402,8 +402,6 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define KIPC_MAX_PROTOHDR 5 /* int: max length of network header */
#define KIPC_MAX_HDR 6 /* int: max total length of headers */
#define KIPC_MAX_DATALEN 7 /* int: max length of data? */
#define KIPC_MBSTAT 8 /* struct: mbuf usage statistics */
#define KIPC_NMBCLUSTERS 9 /* int: maximum mbuf clusters */
/*
* CTL_HW identifiers

View File

@ -89,8 +89,6 @@ vm_map_t kmem_map=0;
vm_map_t exec_map=0;
vm_map_t clean_map=0;
vm_map_t buffer_map=0;
vm_map_t mb_map=0;
int mb_map_full=0;
/*
* kmem_alloc_pageable:
@ -331,6 +329,9 @@ kmem_suballoc(parent, min, max, size)
*
* NOTE: This routine is not supposed to block if M_NOWAIT is set, but
* I have not verified that it actually does not block.
*
* `map' is ONLY allowed to be kmem_map or one of the mbuf submaps to
* which we never free.
*/
vm_offset_t
kmem_malloc(map, size, flags)
@ -344,9 +345,6 @@ kmem_malloc(map, size, flags)
vm_page_t m;
int hadvmlock;
if (map != kmem_map && map != mb_map)
panic("kmem_malloc: map != {kmem,mb}_map");
hadvmlock = mtx_owned(&vm_mtx);
if (!hadvmlock)
mtx_lock(&vm_mtx);
@ -362,9 +360,9 @@ kmem_malloc(map, size, flags)
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
vm_map_unlock(map);
if (map == mb_map) {
mb_map_full = TRUE;
printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n");
if (map != kmem_map) {
printf("Out of mbuf address space!\n");
printf("Consider increasing NMBCLUSTERS\n");
goto bad;
}
if ((flags & M_NOWAIT) == 0)

View File

@ -71,8 +71,6 @@
extern vm_map_t buffer_map;
extern vm_map_t kernel_map;
extern vm_map_t kmem_map;
extern vm_map_t mb_map;
extern int mb_map_full;
extern vm_map_t clean_map;
extern vm_map_t exec_map;
extern u_int vm_kmem_size;

View File

@ -726,14 +726,14 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
mtx_assert(&vm_mtx, MA_OWNED);
start = *addr;
if (map == kmem_map || map == mb_map)
if (map == kmem_map)
s = splvm();
vm_map_lock(map);
if (find_space) {
if (vm_map_findspace(map, start, length, addr)) {
vm_map_unlock(map);
if (map == kmem_map || map == mb_map)
if (map == kmem_map)
splx(s);
return (KERN_NO_SPACE);
}
@ -743,7 +743,7 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
start, start + length, prot, max, cow);
vm_map_unlock(map);
if (map == kmem_map || map == mb_map)
if (map == kmem_map)
splx(s);
return (result);
@ -1951,7 +1951,7 @@ vm_map_remove(map, start, end)
int result, s = 0;
mtx_assert(&vm_mtx, MA_OWNED);
if (map == kmem_map || map == mb_map)
if (map == kmem_map)
s = splvm();
vm_map_lock(map);
@ -1959,7 +1959,7 @@ vm_map_remove(map, start, end)
result = vm_map_delete(map, start, end);
vm_map_unlock(map);
if (map == kmem_map || map == mb_map)
if (map == kmem_map)
splx(s);
return (result);

View File

@ -1696,8 +1696,6 @@ vm_object_in_map( object)
return 1;
if( _vm_object_in_map( buffer_map, object, 0))
return 1;
if( _vm_object_in_map( mb_map, object, 0))
return 1;
return 0;
}

View File

@ -153,6 +153,16 @@ static struct nlist nl[] = {
{ "_nmbclusters" },
#define N_NMBUFS 41
{ "_nmbufs" },
#define N_MBLIM 42
{ "_mbuf_limit" },
#define N_CLLIM 43
{ "_clust_limit" },
#define N_NCPUS 44
{ "_smp_cpus" },
#define N_PAGESZ 45
{ "_pagesize" },
#define N_MBPSTAT 46
{ "_mb_statpcpu" },
{ "" },
};
@ -486,9 +496,14 @@ main(argc, argv)
mbpr(nl[N_MBSTAT].n_value,
nl[N_MBTYPES].n_value,
nl[N_NMBCLUSTERS].n_value,
nl[N_NMBUFS].n_value);
nl[N_NMBUFS].n_value,
nl[N_MBLIM].n_value,
nl[N_CLLIM].n_value,
nl[N_NCPUS].n_value,
nl[N_PAGESZ].n_value,
nl[N_MBPSTAT].n_value);
} else
mbpr(0, 0, 0, 0);
mbpr(0, 0, 0, 0, 0, 0, 0, 0, 0);
exit(0);
}
#if 0

View File

@ -48,11 +48,14 @@ static const char rcsid[] =
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "netstat.h"
#define YES 1
typedef int bool;
/* XXX: mbtypes stats temporarily disactivated. */
#if 0
static struct mbtypenames {
int mt_type;
char *mt_name;
@ -89,24 +92,30 @@ static struct mbtypenames {
#endif
{ 0, 0 }
};
#endif /* 0 */
/*
* Print mbuf statistics.
*/
void
mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr)
mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr,
u_long mblimaddr, u_long cllimaddr, u_long cpusaddr, u_long pgsaddr,
u_long mbpaddr)
{
u_long totmem, totpossible, totmbufs;
register int i;
struct mbstat mbstat;
struct mbtypenames *mp;
int name[3], nmbclusters, nmbufs, nmbcnt, nmbtypes;
size_t nmbclen, nmbuflen, nmbcntlen, mbstatlen, mbtypeslen;
u_long *mbtypes;
bool *seen; /* "have we seen this type yet?" */
int i, nmbufs, nmbclusters, ncpu, page_size, num_objs;
u_int mbuf_limit, clust_limit;
u_long totspace, totnum, totfree;
size_t mlen;
struct mbstat *mbstat = NULL;
struct mbpstat **mbpstat = NULL;
mbtypes = NULL;
seen = NULL;
/* XXX: mbtypes stats temporarily disabled. */
#if 0
int nmbtypes;
size_t mbtypeslen;
struct mbtypenames *mp;
u_long *mbtypes = NULL;
bool *seen = NULL;
/*
* XXX
@ -127,92 +136,182 @@ mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr)
warn("calloc");
goto err;
}
#endif
mlen = sizeof mbstat;
if ((mbstat = malloc(mlen)) == NULL) {
warn("malloc: cannot allocate memory for mbstat");
goto err;
}
/*
* XXX: Unfortunately, for the time being, we have to fetch
* the total length of the per-CPU stats area via sysctl
* (regardless of whether we're looking at a core or not.
*/
if (sysctlbyname("kern.ipc.mb_statpcpu", NULL, &mlen, NULL, 0) < 0) {
warn("sysctl: retrieving mb_statpcpu len");
goto err;
}
num_objs = (int)(mlen / sizeof(struct mbpstat));
if ((mbpstat = calloc(num_objs, sizeof(struct mbpstat *))) == NULL) {
warn("calloc: cannot allocate memory for mbpstats pointers");
goto err;
}
if ((mbpstat[0] = calloc(num_objs, sizeof(struct mbpstat))) == NULL) {
warn("calloc: cannot allocate memory for mbpstats");
goto err;
}
if (mbaddr) {
if (kread(mbaddr, (char *)&mbstat, sizeof mbstat))
if (kread(mbpaddr, (char *)mbpstat[0], mlen))
goto err;
if (kread(mbaddr, (char *)mbstat, sizeof mbstat))
goto err;
#if 0
if (kread(mbtaddr, (char *)mbtypes, mbtypeslen))
goto err;
#endif
if (kread(nmbcaddr, (char *)&nmbclusters, sizeof(int)))
goto err;
if (kread(nmbufaddr, (char *)&nmbufs, sizeof(int)))
goto err;
if (kread(mblimaddr, (char *)&mbuf_limit, sizeof(u_int)))
goto err;
if (kread(cllimaddr, (char *)&clust_limit, sizeof(u_int)))
goto err;
if (kread(cpusaddr, (char *)&ncpu, sizeof(int)))
goto err;
if (kread(pgsaddr, (char *)&page_size, sizeof(int)))
goto err;
} else {
name[0] = CTL_KERN;
name[1] = KERN_IPC;
name[2] = KIPC_MBSTAT;
mbstatlen = sizeof mbstat;
if (sysctl(name, 3, &mbstat, &mbstatlen, 0, 0) < 0) {
if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat[0], &mlen,
NULL, 0) < 0) {
warn("sysctl: retrieving mb_statpcpu");
goto err;
}
if (sysctlbyname("kern.ipc.mbstat", mbstat, &mlen, NULL, 0)
< 0) {
warn("sysctl: retrieving mbstat");
goto err;
}
#if 0
if (sysctlbyname("kern.ipc.mbtypes", mbtypes, &mbtypeslen, NULL,
0) < 0) {
warn("sysctl: retrieving mbtypes");
goto err;
}
name[2] = KIPC_NMBCLUSTERS;
nmbclen = sizeof(int);
if (sysctl(name, 3, &nmbclusters, &nmbclen, 0, 0) < 0) {
#endif
mlen = sizeof(int);
if (sysctlbyname("kern.ipc.nmbclusters", &nmbclusters, &mlen,
NULL, 0) < 0) {
warn("sysctl: retrieving nmbclusters");
goto err;
}
nmbuflen = sizeof(int);
if (sysctlbyname("kern.ipc.nmbufs", &nmbufs, &nmbuflen, 0, 0) < 0) {
mlen = sizeof(int);
if (sysctlbyname("kern.ipc.nmbufs", &nmbufs, &mlen, NULL, 0)
< 0) {
warn("sysctl: retrieving nmbufs");
goto err;
}
mlen = sizeof(u_int);
if (sysctlbyname("kern.ipc.mbuf_limit", &mbuf_limit, &mlen,
NULL, 0) < 0) {
warn("sysctl: retrieving mbuf_limit");
goto err;
}
mlen = sizeof(u_int);
if (sysctlbyname("kern.ipc.clust_limit", &clust_limit, &mlen,
NULL, 0) < 0) {
warn("sysctl: retrieving clust_limit");
goto err;
}
mlen = sizeof(int);
if (sysctlbyname("kern.smp.cpus", &ncpu, &mlen, NULL, 0) < 0) {
warn("sysctl: retrieving kern.smp.cpus");
goto err;
}
mlen = sizeof(int);
if (sysctlbyname("hw.pagesize", &page_size, &mlen, NULL, 0)
< 0) {
warn("sysctl: retrieving hw.pagesize");
goto err;
}
}
nmbcntlen = sizeof(int);
if (sysctlbyname("kern.ipc.nmbcnt", &nmbcnt, &nmbcntlen, 0, 0) < 0) {
warn("sysctl: retrieving nmbcnt");
goto err;
}
for (i = 0; i < num_objs; i++)
mbpstat[i] = mbpstat[0] + i;
#undef MSIZE
#define MSIZE (mbstat.m_msize)
#define MSIZE (mbstat->m_msize)
#undef MCLBYTES
#define MCLBYTES (mbstat.m_mclbytes)
#define MCLBYTES (mbstat->m_mclbytes)
#define MBPERPG (page_size / MSIZE)
#define CLPERPG (page_size / MCLBYTES)
#define GENLST (num_objs - 1)
totmbufs = 0;
for (mp = mbtypenames; mp->mt_name; mp++)
totmbufs += mbtypes[mp->mt_type];
printf("%lu/%lu/%u mbufs in use (current/peak/max):\n", totmbufs,
mbstat.m_mbufs, nmbufs);
for (mp = mbtypenames; mp->mt_name; mp++)
if (mbtypes[mp->mt_type]) {
seen[mp->mt_type] = YES;
printf("\t%lu mbufs allocated to %s\n",
mbtypes[mp->mt_type], mp->mt_name);
}
seen[MT_FREE] = YES;
for (i = 0; i < nmbtypes; i++)
if (!seen[i] && mbtypes[i]) {
printf("\t%lu mbufs allocated to <mbuf type %d>\n",
mbtypes[i], i);
}
printf("%lu/%lu/%u mbuf clusters in use (current/peak/max)\n",
mbstat.m_clusters - mbstat.m_clfree, mbstat.m_clusters,
nmbclusters);
printf("%lu/%lu m_ext reference counters (in use/allocated)\n",
mbstat.m_refcnt - mbstat.m_refree, mbstat.m_refcnt);
totmem = mbstat.m_mbufs * MSIZE + mbstat.m_clusters * MCLBYTES +
mbstat.m_refcnt * sizeof(union mext_refcnt);
totpossible = nmbclusters * MCLBYTES + nmbufs * MSIZE +
nmbcnt * sizeof(union mext_refcnt);
printf("%lu Kbytes allocated to network (%lu%% of mb_map in use)\n",
totmem / 1024, (totmem * 100) / totpossible);
printf("%lu requests for memory denied\n", mbstat.m_drops);
printf("%lu requests for memory delayed\n", mbstat.m_wait);
printf("%lu calls to protocol drain routines\n", mbstat.m_drain);
printf("mbuf usage:\n");
printf("\tGEN list:\t%lu/%lu (in use/in pool)\n",
(mbpstat[GENLST]->mb_mbpgs * MBPERPG - mbpstat[GENLST]->mb_mbfree),
(mbpstat[GENLST]->mb_mbpgs * MBPERPG));
totnum = mbpstat[GENLST]->mb_mbpgs * MBPERPG;
totfree = mbpstat[GENLST]->mb_mbfree;
totspace = mbpstat[GENLST]->mb_mbpgs * page_size;
for (i = 0; i < ncpu; i++) {
printf("\tCPU #%d list:\t%lu/%lu (in use/in pool)\n", i,
(mbpstat[i]->mb_mbpgs * MBPERPG - mbpstat[i]->mb_mbfree),
(mbpstat[i]->mb_mbpgs * MBPERPG));
totspace += mbpstat[i]->mb_mbpgs * page_size;
totnum += mbpstat[i]->mb_mbpgs * MBPERPG;
totfree += mbpstat[i]->mb_mbfree;
}
printf("\tTotal:\t\t%lu/%lu (in use/in pool)\n", (totnum - totfree),
totnum);
printf("\tMaximum number allowed on each CPU list: %d\n", mbuf_limit);
printf("\tMaximum possible: %d\n", nmbufs);
printf("\t%lu%% of mbuf map consumed\n", ((totspace * 100) / (nmbufs
* MSIZE)));
printf("mbuf cluster usage:\n");
printf("\tGEN list:\t%lu/%lu (in use/in pool)\n",
(mbpstat[GENLST]->mb_clpgs * CLPERPG - mbpstat[GENLST]->mb_clfree),
(mbpstat[GENLST]->mb_clpgs * CLPERPG));
totnum = mbpstat[GENLST]->mb_clpgs * CLPERPG;
totfree = mbpstat[GENLST]->mb_clfree;
totspace = mbpstat[GENLST]->mb_clpgs * page_size;
for (i = 0; i < ncpu; i++) {
printf("\tCPU #%d list:\t%lu/%lu (in use/in pool)\n", i,
(mbpstat[i]->mb_clpgs * CLPERPG - mbpstat[i]->mb_clfree),
(mbpstat[i]->mb_clpgs * CLPERPG));
totspace += mbpstat[i]->mb_clpgs * page_size;
totnum += mbpstat[i]->mb_clpgs * CLPERPG;
totfree += mbpstat[i]->mb_clfree;
}
printf("\tTotal:\t\t%lu/%lu (in use/in pool)\n", (totnum - totfree),
totnum);
printf("\tMaximum number allowed on each CPU list: %d\n", clust_limit);
printf("\tMaximum possible: %d\n", nmbclusters);
printf("\t%lu%% of cluster map consumed\n", ((totspace * 100) /
(nmbclusters * MCLBYTES)));
printf("%lu requests for memory denied\n", mbstat->m_drops);
printf("%lu requests for memory delayed\n", mbstat->m_wait);
printf("%lu calls to protocol drain routines\n", mbstat->m_drain);
err:
#if 0
if (mbtypes != NULL)
free(mbtypes);
if (seen != NULL)
free(seen);
#endif
if (mbstat != NULL)
free(mbstat);
if (mbpstat != NULL) {
if (mbpstat[0] != NULL)
free(mbpstat[0]);
free(mbpstat);
}
return;
}

View File

@ -95,7 +95,8 @@ void pfkey_stats (u_long, char *, int);
void bdg_stats (u_long, char *, int);
void mbpr (u_long, u_long, u_long, u_long);
void mbpr (u_long, u_long, u_long, u_long, u_long, u_long,
u_long, u_long, u_long);
void hostpr (u_long, u_long);
void impstats (u_long, u_long);

View File

@ -49,7 +49,12 @@ static const char rcsid[] =
#include "systat.h"
#include "extern.h"
static struct mbstat *mb;
static struct mbpstat **mbpstat;
static int num_objs, ncpu;
#define GENLST (num_objs - 1)
/* XXX: mbtypes stats temporarily disabled. */
#if 0
static u_long *m_mbtypes;
static int nmbtypes;
@ -66,6 +71,7 @@ static struct mtnames {
};
#define NNAMES (sizeof (mtnames) / sizeof (mtnames[0]))
#endif
WINDOW *
openmbufs()
@ -95,12 +101,13 @@ labelmbufs()
void
showmbufs()
{
register int i, j, max, index;
int i, j, max, index;
u_long totfree;
char buf[10];
char *mtname;
if (mb == 0)
return;
/* XXX: mbtypes stats temporarily disabled (will be back soon!) */
#if 0
for (j = 0; j < wnd->_maxy; j++) {
max = 0, index = -1;
for (i = 0; i < wnd->_maxy; i++) {
@ -135,19 +142,27 @@ showmbufs()
while (max--)
waddch(wnd, 'X');
wclrtoeol(wnd);
mb->m_mbufs -= m_mbtypes[index];
m_mbtypes[index] = 0;
}
if (mb->m_mbufs) {
#endif
/*
* Print total number of free mbufs.
*/
totfree = mbpstat[GENLST]->mb_mbfree;
for (i = 0; i < ncpu; i++)
totfree += mbpstat[i]->mb_mbfree;
j = 0; /* XXX */
if (totfree > 0) {
mvwprintw(wnd, 1+j, 0, "%-10.10s", "free");
if (mb->m_mbufs > 60) {
snprintf(buf, sizeof(buf), " %ld", mb->m_mbufs);
mb->m_mbufs = 60;
while (mb->m_mbufs--)
if (totfree > 60) {
snprintf(buf, sizeof(buf), " %lu", totfree);
totfree = 60;
while(totfree--)
waddch(wnd, 'X');
waddstr(wnd, buf);
} else {
while(mb->m_mbufs--)
while(totfree--)
waddch(wnd, 'X');
}
wclrtoeol(wnd);
@ -159,7 +174,10 @@ showmbufs()
int
initmbufs()
{
size_t len, mbtypeslen;
int i;
size_t len;
#if 0
size_t mbtypeslen;
if (sysctlbyname("kern.ipc.mbtypes", NULL, &mbtypeslen, NULL, 0) < 0) {
error("sysctl getting mbtypes size failed");
@ -170,15 +188,28 @@ initmbufs()
return 0;
}
nmbtypes = mbtypeslen / sizeof(*m_mbtypes);
len = 0;
if (sysctlbyname("kern.ipc.mbstat", 0, &len, 0, 0) < 0) {
error("sysctl getting mbstat size failed");
#endif
len = sizeof(int);
if (sysctlbyname("kern.smp.cpus", &ncpu, &len, NULL, 0) < 0) {
error("sysctl getting number of cpus");
return 0;
}
if (sysctlbyname("kern.ipc.mb_statpcpu", NULL, &len, NULL, 0) < 0) {
error("sysctl getting mbpstat total size failed");
return 0;
}
num_objs = (int)(len / sizeof(struct mbpstat));
if ((mbpstat = calloc(num_objs, sizeof(struct mbpstat *))) == NULL) {
error("calloc mbpstat pointers failed");
return 0;
}
if ((mbpstat[0] = calloc(num_objs, sizeof(struct mbpstat))) == NULL) {
error("calloc mbpstat structures failed");
return 0;
}
for (i = 0; i < num_objs; i++)
mbpstat[i] = mbpstat[0] + i;
if (mb == 0)
mb = (struct mbstat *)calloc(1, sizeof *mb);
return 1;
}
@ -187,11 +218,12 @@ fetchmbufs()
{
size_t len;
len = sizeof *mb;
if (sysctlbyname("kern.ipc.mbstat", mb, &len, 0, 0) < 0)
printw("sysctl: mbstat: %s", strerror(errno));
len = num_objs * sizeof(struct mbpstat);
if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat[0], &len, NULL, 0) < 0)
printw("sysctl: mbpstat: %s", strerror(errno));
#if 0
len = nmbtypes * sizeof *m_mbtypes;
if (sysctlbyname("kern.ipc.mbtypes", m_mbtypes, &len, 0, 0) < 0)
printw("sysctl: mbtypes: %s", strerror(errno));
#endif
}