Big mbuf subsystem diff #1: incorporate mutexes and fix things up somewhat

to accomodate the changes.

 Here's a list of things that have changed (I may have left out a few); for a
 relatively complete list, see http://people.freebsd.org/~bmilekic/mtx_journal

   * Remove old (once useful) mcluster code for MCLBYTES > PAGE_SIZE which
     nobody uses anymore. It was great while it lasted, but now we're moving
     onto bigger and better things (Approved by: wollman).

   * Practically re-wrote the allocation macros in sys/sys/mbuf.h to accomodate
     new allocations which grab the necessary lock.

   * Make sure that necessary mbstat variables are manipulated with
     corresponding atomic() routines.

   * Changed the "wait" routines, cleaned it up, made one routine that does
     the job.

   * Generalized MWAKEUP() macro. Got rid of m_retry and m_retryhdr, as they
     are now included in the generalized "wait" routines.

   * Sleep routines now use msleep().

   * Free lists have locks.

   * etc... probably other stuff I'm missing...

  Things to look out for and work on later:

   * find a better way to (dynamically) adjust EXT_COUNTERS

   * move necessity to recurse on a lock from drain routines by providing
     lock-free lower-level version of MFREE() (and possibly m_free()?).

   * checkout include of mutex.h in sys/sys/mbuf.h - probably violating
     general philosophy here.

   The code has been reviewed quite a bit, but problems may arise... please,
   don't panic! Send me Emails: bmilekic@freebsd.org

Reviewed by: jlemon, cp, alfred, others?
This commit is contained in:
bmilekic 2000-09-30 06:30:39 +00:00
parent 25fc889685
commit 73f1784807
8 changed files with 398 additions and 479 deletions

View File

@ -185,8 +185,6 @@ SYSCTL_INT(_machdep, CPU_UNALIGNED_SIGBUS, unaligned_sigbus,
static void cpu_startup __P((void *));
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
struct msgbuf *msgbufp=0;
int bootverbose = 0, Maxmem = 0;
@ -373,18 +371,9 @@ cpu_startup(dummy)
(16*(ARG_MAX+(PAGE_SIZE*3))));
/*
* Finally, allocate mbuf pool.
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
*/
{
vm_offset_t mb_map_size;
mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES +
(nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt);
mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl,
&maxaddr, mb_map_size);
mb_map->system_map = 1;
}
/*
* Initialize callouts

View File

@ -139,8 +139,6 @@ extern void initializecpu(void);
static void cpu_startup __P((void *));
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
int _udatasel, _ucodesel;
u_int atdevbase;
@ -399,18 +397,9 @@ cpu_startup(dummy)
(16*(ARG_MAX+(PAGE_SIZE*3))));
/*
* Finally, allocate mbuf pool.
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
*/
{
vm_offset_t mb_map_size;
mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES +
(nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt);
mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl,
&maxaddr, mb_map_size);
mb_map->system_map = 1;
}
/*
* Initialize callouts

View File

@ -139,8 +139,6 @@ extern void initializecpu(void);
static void cpu_startup __P((void *));
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
int _udatasel, _ucodesel;
u_int atdevbase;
@ -399,18 +397,9 @@ cpu_startup(dummy)
(16*(ARG_MAX+(PAGE_SIZE*3))));
/*
* Finally, allocate mbuf pool.
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
*/
{
vm_offset_t mb_map_size;
mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES +
(nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt);
mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl,
&maxaddr, mb_map_size);
mb_map->system_map = 1;
}
/*
* Initialize callouts

View File

@ -43,14 +43,10 @@
#include <sys/sysctl.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <vm/vm.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
#ifdef INVARIANTS
#include <machine/cpu.h>
#endif
#include <machine/mutex.h>
static void mbinit __P((void *));
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
@ -58,18 +54,26 @@ SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
struct mbuf *mbutl;
struct mbstat mbstat;
u_long mbtypes[MT_NTYPES];
struct mbuf *mmbfree;
union mcluster *mclfree;
union mext_refcnt *mext_refcnt_free;
int max_linkhdr;
int max_protohdr;
int max_hdr;
int max_datalen;
int nmbclusters;
int nmbufs;
u_int m_mballoc_wid = 0;
u_int m_clalloc_wid = 0;
u_long m_mballoc_wid = 0;
u_long m_clalloc_wid = 0;
/*
* freelist header structures...
* mbffree_lst, mclfree_lst, mcntfree_lst
*/
struct mbffree_lst mmbfree;
struct mclfree_lst mclfree;
struct mcntfree_lst mcntfree;
/*
* sysctl(8) exported objects
*/
SYSCTL_DECL(_kern_ipc);
SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
&max_linkhdr, 0, "");
@ -95,41 +99,70 @@ TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs);
static void m_reclaim __P((void));
/* Initial allocation numbers */
#define NCL_INIT 2
#define NMB_INIT 16
#define REF_INIT (NMBCLUSTERS * 2)
#define REF_INIT NMBCLUSTERS
/* ARGSUSED*/
/*
* Full mbuf subsystem initialization done here.
*
* XXX: If ever we have system specific map setups to do, then move them to
* machdep.c - for now, there is no reason for this stuff to go there.
*/
static void
mbinit(dummy)
void *dummy;
{
int s;
vm_offset_t maxaddr, mb_map_size;
mmbfree = NULL;
mclfree = NULL;
mext_refcnt_free = NULL;
/*
* Setup the mb_map, allocate requested VM space.
*/
mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + EXT_COUNTERS
* sizeof(union mext_refcnt);
mb_map_size = roundup2(mb_map_size, PAGE_SIZE);
mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
mb_map_size);
/* XXX: mb_map->system_map = 1; */
/*
* Initialize the free list headers, and setup locks for lists.
*/
mmbfree.m_head = NULL;
mclfree.m_head = NULL;
mcntfree.m_head = NULL;
mtx_init(&mmbfree.m_mtx, "mbuf free list lock", MTX_DEF);
mtx_init(&mclfree.m_mtx, "mcluster free list lock", MTX_DEF);
mtx_init(&mcntfree.m_mtx, "m_ext counter free list lock", MTX_DEF);
/*
* Initialize mbuf subsystem (sysctl exported) statistics structure.
*/
mbstat.m_msize = MSIZE;
mbstat.m_mclbytes = MCLBYTES;
mbstat.m_minclsize = MINCLSIZE;
mbstat.m_mlen = MLEN;
mbstat.m_mhlen = MHLEN;
s = splimp();
if (m_alloc_ref(REF_INIT) == 0)
/*
* Perform some initial allocations.
*/
mtx_enter(&mcntfree.m_mtx, MTX_DEF);
if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0)
goto bad;
mtx_exit(&mcntfree.m_mtx, MTX_DEF);
mtx_enter(&mmbfree.m_mtx, MTX_DEF);
if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
goto bad;
#if MCLBYTES <= PAGE_SIZE
mtx_exit(&mmbfree.m_mtx, MTX_DEF);
mtx_enter(&mclfree.m_mtx, MTX_DEF);
if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
goto bad;
#else
/* It's OK to call contigmalloc in this context. */
if (m_clalloc(16, M_WAIT) == 0)
goto bad;
#endif
splx(s);
mtx_exit(&mclfree.m_mtx, MTX_DEF);
return;
bad:
panic("mbinit: failed to initialize mbuf subsystem!");
@ -138,37 +171,55 @@ mbinit(dummy)
/*
* Allocate at least nmb reference count structs and place them
* on the ref cnt free list.
* Must be called at splimp.
*
* Must be called with the mcntfree lock held.
*/
int
m_alloc_ref(nmb)
m_alloc_ref(nmb, how)
u_int nmb;
int how;
{
caddr_t p;
u_int nbytes;
int i;
/*
* XXX:
* We don't cap the amount of memory that can be used
* by the reference counters, like we do for mbufs and
* mbuf clusters. The reason is that we don't really expect
* to have to be allocating too many of these guys with m_alloc_ref(),
* and if we are, we're probably not out of the woods anyway,
* so leave this way for now.
* mbuf clusters. In fact, we're absolutely sure that we
* won't ever be going over our allocated space. We keep enough
* space in mb_map to accomodate maximum values of allocatable
* external buffers including, but not limited to, clusters.
* (That's also why we won't have to have wait routines for
* counters).
*
* If we're in here, we're absolutely certain to be returning
* succesfully, as long as there is physical memory to accomodate
* us. And if there isn't, but we're willing to wait, then
* kmem_malloc() will do the only waiting needed.
*/
if (mb_map_full)
return (0);
nbytes = round_page(nmb * sizeof(union mext_refcnt));
if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT)) == NULL)
mtx_exit(&mcntfree.m_mtx, MTX_DEF);
mtx_enter(&Giant, MTX_DEF);
if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_WAIT ? M_WAIT :
M_NOWAIT)) == NULL) {
mtx_exit(&Giant, MTX_DEF);
mtx_enter(&mcntfree.m_mtx, MTX_DEF); /* XXX: We must be holding
it going out. */
return (0);
}
mtx_exit(&Giant, MTX_DEF);
nmb = nbytes / sizeof(union mext_refcnt);
/*
* We don't let go of the mutex in order to avoid a race.
* It is up to the caller to let go of the mutex.
*/
mtx_enter(&mcntfree.m_mtx, MTX_DEF);
for (i = 0; i < nmb; i++) {
((union mext_refcnt *)p)->next_ref = mext_refcnt_free;
mext_refcnt_free = (union mext_refcnt *)p;
((union mext_refcnt *)p)->next_ref = mcntfree.m_head;
mcntfree.m_head = (union mext_refcnt *)p;
p += sizeof(union mext_refcnt);
mbstat.m_refree++;
}
@ -179,9 +230,9 @@ m_alloc_ref(nmb)
/*
* Allocate at least nmb mbufs and place on mbuf free list.
* Must be called at splimp.
*
* Must be called with the mmbfree lock held.
*/
/* ARGSUSED */
int
m_mballoc(nmb, how)
register int nmb;
@ -192,44 +243,52 @@ m_mballoc(nmb, how)
int nbytes;
/*
* If we've hit the mbuf limit, stop allocating from mb_map,
* (or trying to) in order to avoid dipping into the section of
* mb_map which we've "reserved" for clusters.
* If we've hit the mbuf limit, stop allocating from mb_map.
* Also, once we run out of map space, it will be impossible to
* get any more (nothing is ever freed back to the map).
*/
if ((nmb + mbstat.m_mbufs) > nmbufs)
if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) {
/*
* Needs to be atomic as we may be incrementing it
* while holding another mutex, like mclfree. In other
* words, m_drops is not reserved solely for mbufs,
* but is also available for clusters.
*/
atomic_add_long(&mbstat.m_drops, 1);
return (0);
/*
* Once we run out of map space, it will be impossible to get
* any more (nothing is ever freed back to the map)
* -- however you are not dead as m_reclaim might
* still be able to free a substantial amount of space.
*
* XXX Furthermore, we can also work with "recycled" mbufs (when
* we're calling with M_WAIT the sleep procedure will be woken
* up when an mbuf is freed. See m_mballoc_wait()).
*/
if (mb_map_full)
return (0);
nbytes = round_page(nmb * MSIZE);
p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
if (p == 0 && how == M_WAIT) {
mbstat.m_wait++;
p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
}
nbytes = round_page(nmb * MSIZE);
/* XXX: The letting go of the mmbfree lock here may eventually
be moved to only be done for M_WAIT calls to kmem_malloc() */
mtx_exit(&mmbfree.m_mtx, MTX_DEF);
mtx_enter(&Giant, MTX_DEF);
p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
if (p == 0 && how == M_WAIT) {
atomic_add_long(&mbstat.m_wait, 1);
p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
}
mtx_exit(&Giant, MTX_DEF);
mtx_enter(&mmbfree.m_mtx, MTX_DEF);
/*
* Either the map is now full, or `how' is M_NOWAIT and there
* Either the map is now full, or `how' is M_DONTWAIT and there
* are no pages left.
*/
if (p == NULL)
return (0);
nmb = nbytes / MSIZE;
/*
* We don't let go of the mutex in order to avoid a race.
* It is up to the caller to let go of the mutex when done
* with grabbing the mbuf from the free list.
*/
for (i = 0; i < nmb; i++) {
((struct mbuf *)p)->m_next = mmbfree;
mmbfree = (struct mbuf *)p;
((struct mbuf *)p)->m_next = mmbfree.m_head;
mmbfree.m_head = (struct mbuf *)p;
p += MSIZE;
}
mbstat.m_mbufs += nmb;
@ -240,85 +299,65 @@ m_mballoc(nmb, how)
/*
* Once the mb_map has been exhausted and if the call to the allocation macros
* (or, in some cases, functions) is with M_WAIT, then it is necessary to rely
* solely on reclaimed mbufs. Here we wait for an mbuf to be freed for a
* solely on reclaimed mbufs.
*
* Here we request for the protocols to free up some resources and, if we
* still cannot get anything, then we wait for an mbuf to be freed for a
* designated (mbuf_wait) time.
*
* Must be called with the mmbfree mutex held, and we will probably end
* up recursing into that lock from some of the drain routines, but
* this should be okay, as long as we don't block there, or attempt
* to allocate from them (theoretically impossible).
*/
struct mbuf *
m_mballoc_wait(int caller, int type)
m_mballoc_wait(void)
{
struct mbuf *p;
int s;
s = splimp();
m_mballoc_wid++;
if ((tsleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait)) == EWOULDBLOCK)
m_mballoc_wid--;
splx(s);
struct mbuf *p = NULL;
/*
* Now that we (think) that we've got something, we will redo an
* MGET, but avoid getting into another instance of m_mballoc_wait()
* XXX: We retry to fetch _even_ if the sleep timed out. This is left
* this way, purposely, in the [unlikely] case that an mbuf was
* freed but the sleep was not awakened in time.
* See if we can drain some resources out of the protocols.
*/
p = NULL;
switch (caller) {
case MGET_C:
MGET(p, M_DONTWAIT, type);
break;
case MGETHDR_C:
MGETHDR(p, M_DONTWAIT, type);
break;
default:
panic("m_mballoc_wait: invalid caller (%d)", caller);
m_reclaim();
_MGET(p, M_DONTWAIT);
if (p == NULL) {
m_mballoc_wid++;
if (msleep(&m_mballoc_wid, &mmbfree.m_mtx, PVM, "mballc",
mbuf_wait) == EWOULDBLOCK)
m_mballoc_wid--;
/*
* Try again (one last time).
*
* We retry to fetch _even_ if the sleep timed out. This
* is left this way, purposely, in the [unlikely] case
* that an mbuf was freed but the sleep was not awoken
* in time.
*
* If the sleep didn't time out (i.e. we got woken up) then
* we have the lock so we just grab an mbuf, hopefully.
*/
_MGET(p, M_DONTWAIT);
}
s = splimp();
if (p != NULL) { /* We waited and got something... */
mbstat.m_wait++;
/* Wake up another if we have more free. */
if (mmbfree != NULL)
MMBWAKEUP();
}
splx(s);
/* If we waited and got something... */
if (p != NULL) {
atomic_add_long(&mbstat.m_wait, 1);
if (mmbfree.m_head != NULL)
MBWAKEUP(m_mballoc_wid);
} else
atomic_add_long(&mbstat.m_drops, 1);
return (p);
}
#if MCLBYTES > PAGE_SIZE
static int i_want_my_mcl;
static void
kproc_mclalloc(void)
{
int status;
while (1) {
tsleep(&i_want_my_mcl, PVM, "mclalloc", 0);
for (; i_want_my_mcl; i_want_my_mcl--) {
if (m_clalloc(1, M_WAIT) == 0)
printf("m_clalloc failed even in process context!\n");
}
}
}
static struct proc *mclallocproc;
static struct kproc_desc mclalloc_kp = {
"mclalloc",
kproc_mclalloc,
&mclallocproc
};
SYSINIT(mclallocproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
&mclalloc_kp);
#endif
/*
* Allocate some number of mbuf clusters
* and place on cluster free list.
* Must be called at splimp.
*
* Must be called with the mclfree lock held.
*/
/* ARGSUSED */
int
m_clalloc(ncl, how)
register int ncl;
@ -329,54 +368,39 @@ m_clalloc(ncl, how)
int npg;
/*
* If the map is now full (nothing will ever be freed to it).
* If we've hit the mcluster number limit, stop allocating from
* mb_map, (or trying to) in order to avoid dipping into the section
* of mb_map which we've "reserved" for mbufs.
* mb_map.
*/
if ((ncl + mbstat.m_clusters) > nmbclusters) {
mbstat.m_drops++;
if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) {
atomic_add_long(&mbstat.m_drops, 1);
return (0);
}
/*
* Once we run out of map space, it will be impossible
* to get any more (nothing is ever freed back to the
* map). From this point on, we solely rely on freed
* mclusters.
*/
if (mb_map_full) {
mbstat.m_drops++;
return (0);
}
#if MCLBYTES > PAGE_SIZE
if (how != M_WAIT) {
i_want_my_mcl += ncl;
wakeup(&i_want_my_mcl);
mbstat.m_wait++;
p = 0;
} else {
p = contigmalloc1(MCLBYTES * ncl, M_DEVBUF, M_WAITOK, 0ul,
~0ul, PAGE_SIZE, 0, mb_map);
}
#else
npg = ncl;
mtx_exit(&mclfree.m_mtx, MTX_DEF);
mtx_enter(&Giant, MTX_DEF);
p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
how != M_WAIT ? M_NOWAIT : M_WAITOK);
mtx_exit(&Giant, MTX_DEF);
ncl = ncl * PAGE_SIZE / MCLBYTES;
#endif
mtx_enter(&mclfree.m_mtx, MTX_DEF);
/*
* Either the map is now full, or `how' is M_NOWAIT and there
* Either the map is now full, or `how' is M_DONTWAIT and there
* are no pages left.
*/
if (p == NULL) {
mbstat.m_drops++;
atomic_add_long(&mbstat.m_drops, 1);
return (0);
}
/*
* We don't let go of the mutex in order to avoid a race.
*/
for (i = 0; i < ncl; i++) {
((union mcluster *)p)->mcl_next = mclfree;
mclfree = (union mcluster *)p;
((union mcluster *)p)->mcl_next = mclfree.m_head;
mclfree.m_head = (union mcluster *)p;
p += MCLBYTES;
mbstat.m_clfree++;
}
@ -386,131 +410,60 @@ m_clalloc(ncl, how)
/*
* Once the mb_map submap has been exhausted and the allocation is called with
* M_WAIT, we rely on the mclfree union pointers. If nothing is free, we will
* M_WAIT, we rely on the mclfree list. If nothing is free, we will
* sleep for a designated amount of time (mbuf_wait) or until we're woken up
* due to sudden mcluster availability.
*
* Must be called with the mclfree lock held.
*/
caddr_t
m_clalloc_wait(void)
{
caddr_t p;
int s;
caddr_t p = NULL;
#ifdef __i386__
/* If in interrupt context, and INVARIANTS, maintain sanity and die. */
KASSERT(intr_nesting_level == 0, ("CLALLOC: CANNOT WAIT IN INTERRUPT"));
#endif
/* Sleep until something's available or until we expire. */
m_clalloc_wid++;
if ((tsleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait)) == EWOULDBLOCK)
if (msleep(&m_clalloc_wid, &mclfree.m_mtx, PVM, "mclalc", mbuf_wait)
== EWOULDBLOCK)
m_clalloc_wid--;
/*
* Now that we (think) that we've got something, we will redo and
* MGET, but avoid getting into another instance of m_clalloc_wait()
* Now that we (think) that we've got something, try again.
*/
p = NULL;
_MCLALLOC(p, M_DONTWAIT);
s = splimp();
if (p != NULL) { /* We waited and got something... */
mbstat.m_wait++;
/* Wake up another if we have more free. */
if (mclfree != NULL)
MCLWAKEUP();
}
/* If we waited and got something ... */
if (p != NULL) {
atomic_add_long(&mbstat.m_wait, 1);
if (mclfree.m_head != NULL)
MBWAKEUP(m_clalloc_wid);
} else
atomic_add_long(&mbstat.m_drops, 1);
splx(s);
return (p);
}
/*
* When MGET fails, ask protocols to free space when short of memory,
* then re-attempt to allocate an mbuf.
* m_reclaim: drain protocols in hopes to free up some resources...
*
* Should be called with mmbfree.m_mtx mutex held. We will most likely
* recursively grab it from within some drain routines, but that's okay,
* as the mutex will never be completely released until we let go of it
* after our m_reclaim() is over.
*
* Note: Drain routines are only allowed to free mbufs (and mclusters,
* as a consequence, if need be). They are not allowed to allocate
* new ones (that would defeat the purpose, anyway).
*/
struct mbuf *
m_retry(i, t)
int i, t;
{
register struct mbuf *m;
/*
* Must only do the reclaim if not in an interrupt context.
*/
if (i == M_WAIT) {
#ifdef __i386__
KASSERT(intr_nesting_level == 0,
("MBALLOC: CANNOT WAIT IN INTERRUPT"));
#endif
m_reclaim();
}
/*
* Both m_mballoc_wait and m_retry must be nulled because
* when the MGET macro is run from here, we deffinately do _not_
* want to enter an instance of m_mballoc_wait() or m_retry() (again!)
*/
#define m_mballoc_wait(caller,type) (struct mbuf *)0
#define m_retry(i, t) (struct mbuf *)0
MGET(m, i, t);
#undef m_retry
#undef m_mballoc_wait
if (m != NULL)
mbstat.m_wait++;
else
mbstat.m_drops++;
return (m);
}
/*
* As above; retry an MGETHDR.
*/
struct mbuf *
m_retryhdr(i, t)
int i, t;
{
register struct mbuf *m;
/*
* Must only do the reclaim if not in an interrupt context.
*/
if (i == M_WAIT) {
#ifdef __i386__
KASSERT(intr_nesting_level == 0,
("MBALLOC: CANNOT WAIT IN INTERRUPT"));
#endif
m_reclaim();
}
#define m_mballoc_wait(caller,type) (struct mbuf *)0
#define m_retryhdr(i, t) (struct mbuf *)0
MGETHDR(m, i, t);
#undef m_retryhdr
#undef m_mballoc_wait
if (m != NULL)
mbstat.m_wait++;
else
mbstat.m_drops++;
return (m);
}
static void
m_reclaim()
{
register struct domain *dp;
register struct protosw *pr;
int s = splimp();
for (dp = domains; dp; dp = dp->dom_next)
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_drain)
(*pr->pr_drain)();
splx(s);
mbstat.m_drain++;
}
@ -685,11 +638,11 @@ m_copym(m, off0, len, wait)
np = &n->m_next;
}
if (top == 0)
MCFail++;
atomic_add_long(&MCFail, 1);
return (top);
nospace:
m_freem(top);
MCFail++;
atomic_add_long(&MCFail, 1);
return (0);
}
@ -746,7 +699,7 @@ m_copypacket(m, how)
return top;
nospace:
m_freem(top);
MCFail++;
atomic_add_long(&MCFail, 1);
return 0;
}
@ -853,7 +806,7 @@ m_dup(m, how)
nospace:
m_freem(top);
MCFail++;
atomic_add_long(&MCFail, 1);
return (0);
}
@ -1022,7 +975,7 @@ m_pullup(n, len)
return (m);
bad:
m_freem(n);
MPFail++;
atomic_add_long(&MPFail, 1);
return (0);
}

View File

@ -145,8 +145,6 @@ extern void initializecpu(void);
static void cpu_startup __P((void *));
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
#ifdef PC98
int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */
int need_post_dma_flush; /* If 1, use invd after DMA transfer. */
@ -413,18 +411,9 @@ cpu_startup(dummy)
(16*(ARG_MAX+(PAGE_SIZE*3))));
/*
* Finally, allocate mbuf pool.
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
*/
{
vm_offset_t mb_map_size;
mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES +
(nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt);
mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl,
&maxaddr, mb_map_size);
mb_map->system_map = 1;
}
/*
* Initialize callouts

View File

@ -145,8 +145,6 @@ extern void initializecpu(void);
static void cpu_startup __P((void *));
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
#ifdef PC98
int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */
int need_post_dma_flush; /* If 1, use invd after DMA transfer. */
@ -413,18 +411,9 @@ cpu_startup(dummy)
(16*(ARG_MAX+(PAGE_SIZE*3))));
/*
* Finally, allocate mbuf pool.
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
*/
{
vm_offset_t mb_map_size;
mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES +
(nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt);
mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl,
&maxaddr, mb_map_size);
mb_map->system_map = 1;
}
/*
* Initialize callouts

View File

@ -120,12 +120,12 @@ enum sysinit_sub_id {
SI_SUB_RUN_QUEUE = 0x2400000, /* set up run queue*/
SI_SUB_CREATE_INIT = 0x2500000, /* create init process*/
SI_SUB_SCHED_IDLE = 0x2600000, /* required idle procs */
SI_SUB_SOFTINTR = 0x2700000, /* start soft interrupt thread */
SI_SUB_MBUF = 0x2700000, /* mbuf subsystem */
SI_SUB_SOFTINTR = 0x2800000, /* start soft interrupt thread */
SI_SUB_DRIVERS = 0x3100000, /* Let Drivers initialize */
SI_SUB_CONFIGURE = 0x3800000, /* Configure devices */
SI_SUB_VFS = 0x4000000, /* virtual file system*/
SI_SUB_CLOCKS = 0x4800000, /* real time and stat clocks*/
SI_SUB_MBUF = 0x5000000, /* mbufs*/
SI_SUB_CLIST = 0x5800000, /* clists*/
SI_SUB_SYSV_SHM = 0x6400000, /* System V shared memory*/
SI_SUB_SYSV_SEM = 0x6800000, /* System V semaphores*/

View File

@ -37,6 +37,8 @@
#ifndef _SYS_MBUF_H_
#define _SYS_MBUF_H_
#include <machine/mutex.h> /* XXX */
/*
* Mbufs are of a single size, MSIZE (machine/param.h), which
* includes overhead. An mbuf may add a single "mbuf cluster" of size
@ -51,6 +53,22 @@
#define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */
#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
/*
* Maximum number of allocatable counters for external buffers. This
* ensures enough VM address space for the allocation of counters
* in the extreme case where all possible external buffers are allocated.
*
* Note: When new types of external storage are allocated, EXT_COUNTERS
* must be tuned accordingly. Practically, this isn't a big deal
* as each counter is only a word long, so we can fit
* (PAGE_SIZE / length of word) counters in a single page.
*
* XXX: Must increase this if using any of if_ti, if_wb, if_sk drivers,
* or any other drivers which may manage their own buffers and
* eventually attach them to mbufs.
*/
#define EXT_COUNTERS (nmbclusters + nsfbufs)
/*
* Macros for type conversion
* mtod(m, t) - convert mbuf pointer to data pointer of correct type
@ -181,11 +199,11 @@ struct mbuf {
* mbuf statistics
*/
struct mbstat {
u_long m_mbufs; /* mbufs obtained from page pool */
u_long m_clusters; /* clusters obtained from page pool */
u_long m_clfree; /* free clusters */
u_long m_refcnt; /* refcnt structs obtained from page pool */
u_long m_refree; /* free refcnt structs */
u_long m_mbufs; /* # mbufs obtained from page pool */
u_long m_clusters; /* # clusters obtained from page pool */
u_long m_clfree; /* # clusters on freelist (cache) */
u_long m_refcnt; /* # ref counters obtained from page pool */
u_long m_refree; /* # ref counters on freelist (cache) */
u_long m_spare; /* spare field */
u_long m_drops; /* times failed to find space */
u_long m_wait; /* times waited for space */
@ -203,8 +221,7 @@ struct mbstat {
#define M_DONTWAIT 1
#define M_WAIT 0
/* Freelists:
*
/*
* Normal mbuf clusters are normally treated as character arrays
* after allocation, but use the first word of the buffer as a free list
* pointer while on the free list.
@ -214,15 +231,6 @@ union mcluster {
char mcl_buf[MCLBYTES];
};
/*
* These are identifying numbers passed to the m_mballoc_wait function,
* allowing us to determine whether the call came from an MGETHDR or
* an MGET.
*/
#define MGETHDR_C 1
#define MGET_C 2
/*
* The m_ext object reference counter structure.
*/
@ -232,41 +240,34 @@ union mext_refcnt {
};
/*
* Wake up the next instance (if any) of m_mballoc_wait() which is
* waiting for an mbuf to be freed. This should be called at splimp().
*
* XXX: If there is another free mbuf, this routine will be called [again]
* from the m_mballoc_wait routine in order to wake another sleep instance.
* free list header definitions: mbffree_lst, mclfree_lst, mcntfree_lst
*/
#define MMBWAKEUP() do { \
if (m_mballoc_wid) { \
m_mballoc_wid--; \
wakeup_one(&m_mballoc_wid); \
} \
} while (0)
struct mbffree_lst {
struct mbuf *m_head;
struct mtx m_mtx;
};
struct mclfree_lst {
union mcluster *m_head;
struct mtx m_mtx;
};
struct mcntfree_lst {
union mext_refcnt *m_head;
struct mtx m_mtx;
};
/*
* Same as above, but for mbuf cluster(s).
*/
#define MCLWAKEUP() do { \
if (m_clalloc_wid) { \
m_clalloc_wid--; \
wakeup_one(&m_clalloc_wid); \
} \
} while (0)
/*
* mbuf utility macros:
* Wake up the next instance (if any) of a sleeping allocation - which is
* waiting for a {cluster, mbuf} to be freed.
*
* MBUFLOCK(code)
* prevents a section of code from from being interrupted by network
* drivers.
* Must be called with the appropriate mutex held.
*/
#define MBUFLOCK(code) do { \
int _ms = splimp(); \
\
{ code } \
splx(_ms); \
#define MBWAKEUP(m_wid) do { \
if ((m_wid)) { \
m_wid--; \
wakeup_one(&(m_wid)); \
} \
} while (0)
/*
@ -286,31 +287,38 @@ union mext_refcnt {
#define MEXT_ADD_REF(m) atomic_add_long(&((m)->m_ext.ref_cnt->refcnt), 1)
#define _MEXT_ALLOC_CNT(m_cnt) MBUFLOCK( \
#define _MEXT_ALLOC_CNT(m_cnt, how) do { \
union mext_refcnt *__mcnt; \
\
if ((mext_refcnt_free == NULL) && (m_alloc_ref(1) == 0)) \
panic("mbuf subsystem: out of ref counts!"); \
__mcnt = mext_refcnt_free; \
mext_refcnt_free = __mcnt->next_ref; \
__mcnt->refcnt = 0; \
mtx_enter(&mcntfree.m_mtx, MTX_DEF); \
if (mcntfree.m_head == NULL) \
m_alloc_ref(1, (how)); \
__mcnt = mcntfree.m_head; \
if (__mcnt != NULL) { \
mcntfree.m_head = __mcnt->next_ref; \
mbstat.m_refree--; \
__mcnt->refcnt = 0; \
} \
mtx_exit(&mcntfree.m_mtx, MTX_DEF); \
(m_cnt) = __mcnt; \
mbstat.m_refree--; \
)
} while (0)
#define _MEXT_DEALLOC_CNT(m_cnt) do { \
union mext_refcnt *__mcnt = (m_cnt); \
\
__mcnt->next_ref = mext_refcnt_free; \
mext_refcnt_free = __mcnt; \
mtx_enter(&mcntfree.m_mtx, MTX_DEF); \
__mcnt->next_ref = mcntfree.m_head; \
mcntfree.m_head = __mcnt; \
mbstat.m_refree++; \
mtx_exit(&mcntfree.m_mtx, MTX_DEF); \
} while (0)
#define MEXT_INIT_REF(m) do { \
#define MEXT_INIT_REF(m, how) do { \
struct mbuf *__mmm = (m); \
\
_MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt); \
MEXT_ADD_REF(__mmm); \
_MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt, (how)); \
if (__mmm != NULL) \
MEXT_ADD_REF(__mmm); \
} while (0)
/*
@ -323,62 +331,71 @@ union mext_refcnt {
* allocates an mbuf and initializes it to contain a packet header
* and internal data.
*/
#define MGET(m, how, type) do { \
/*
* Lower-level macros for MGET(HDR)... Not to be used outside the
* subsystem ("non-exportable" macro names are prepended with "_").
*/
#define _MGET_SETUP(m_set, m_set_type) do { \
(m_set)->m_type = (m_set_type); \
(m_set)->m_next = NULL; \
(m_set)->m_nextpkt = NULL; \
(m_set)->m_data = (m_set)->m_dat; \
(m_set)->m_flags = 0; \
} while (0)
#define _MGET(m_mget, m_get_how) do { \
if (mmbfree.m_head == NULL) \
m_mballoc(1, (m_get_how)); \
(m_mget) = mmbfree.m_head; \
if ((m_mget) != NULL) { \
mmbfree.m_head = (m_mget)->m_next; \
mbtypes[MT_FREE]--; \
} else { \
if ((m_get_how) == M_WAIT) \
(m_mget) = m_mballoc_wait(); \
} \
} while (0)
#define MGET(m, how, type) do { \
struct mbuf *_mm; \
int _mhow = (how); \
int _mtype = (type); \
int _ms = splimp(); \
\
if (mmbfree == NULL) \
(void)m_mballoc(1, _mhow); \
_mm = mmbfree; \
mtx_enter(&mmbfree.m_mtx, MTX_DEF); \
_MGET(_mm, _mhow); \
if (_mm != NULL) { \
mmbfree = _mm->m_next; \
mbtypes[MT_FREE]--; \
mbtypes[_mtype]++; \
splx(_ms); \
_mm->m_type = _mtype; \
_mm->m_next = NULL; \
_mm->m_nextpkt = NULL; \
_mm->m_data = _mm->m_dat; \
_mm->m_flags = 0; \
} else { \
splx(_ms); \
_mm = m_retry(_mhow, _mtype); \
if (_mm == NULL && _mhow == M_WAIT) \
_mm = m_mballoc_wait(MGET_C, _mtype); \
} \
mbtypes[_mtype]++; \
mtx_exit(&mmbfree.m_mtx, MTX_DEF); \
_MGET_SETUP(_mm, _mtype); \
} else \
mtx_exit(&mmbfree.m_mtx, MTX_DEF); \
(m) = _mm; \
} while (0)
#define MGETHDR(m, how, type) do { \
#define _MGETHDR_SETUP(m_set, m_set_type) do { \
(m_set)->m_type = (m_set_type); \
(m_set)->m_next = NULL; \
(m_set)->m_nextpkt = NULL; \
(m_set)->m_data = (m_set)->m_pktdat; \
(m_set)->m_flags = M_PKTHDR; \
(m_set)->m_pkthdr.rcvif = NULL; \
(m_set)->m_pkthdr.csum_flags = 0; \
(m_set)->m_pkthdr.aux = NULL; \
} while (0)
#define MGETHDR(m, how, type) do { \
struct mbuf *_mm; \
int _mhow = (how); \
int _mtype = (type); \
int _ms = splimp(); \
\
if (mmbfree == NULL) \
(void)m_mballoc(1, _mhow); \
_mm = mmbfree; \
mtx_enter(&mmbfree.m_mtx, MTX_DEF); \
_MGET(_mm, _mhow); \
if (_mm != NULL) { \
mmbfree = _mm->m_next; \
mbtypes[MT_FREE]--; \
mbtypes[_mtype]++; \
splx(_ms); \
_mm->m_type = _mtype; \
_mm->m_next = NULL; \
_mm->m_nextpkt = NULL; \
_mm->m_data = _mm->m_pktdat; \
_mm->m_flags = M_PKTHDR; \
_mm->m_pkthdr.rcvif = NULL; \
_mm->m_pkthdr.csum_flags = 0; \
_mm->m_pkthdr.aux = NULL; \
} else { \
splx(_ms); \
_mm = m_retryhdr(_mhow, _mtype); \
if (_mm == NULL && _mhow == M_WAIT) \
_mm = m_mballoc_wait(MGETHDR_C, _mtype); \
} \
mbtypes[_mtype]++; \
mtx_exit(&mmbfree.m_mtx, MTX_DEF); \
_MGETHDR_SETUP(_mm, _mtype); \
} else \
mtx_exit(&mmbfree.m_mtx, MTX_DEF); \
(m) = _mm; \
} while (0)
@ -393,17 +410,14 @@ union mext_refcnt {
#define _MCLALLOC(p, how) do { \
caddr_t _mp; \
int _mhow = (how); \
int _ms = splimp(); \
\
if (mclfree == NULL) \
(void)m_clalloc(1, _mhow); \
_mp = (caddr_t)mclfree; \
if (mclfree.m_head == NULL) \
m_clalloc(1, _mhow); \
_mp = (caddr_t)mclfree.m_head; \
if (_mp != NULL) { \
mbstat.m_clfree--; \
mclfree = ((union mcluster *)_mp)->mcl_next; \
splx(_ms); \
mclfree.m_head = ((union mcluster *)_mp)->mcl_next; \
} else { \
splx(_ms); \
if (_mhow == M_WAIT) \
_mp = m_clalloc_wait(); \
} \
@ -413,39 +427,50 @@ union mext_refcnt {
#define MCLGET(m, how) do { \
struct mbuf *_mm = (m); \
\
mtx_enter(&mclfree.m_mtx, MTX_DEF); \
_MCLALLOC(_mm->m_ext.ext_buf, (how)); \
mtx_exit(&mclfree.m_mtx, MTX_DEF); \
if (_mm->m_ext.ext_buf != NULL) { \
_mm->m_data = _mm->m_ext.ext_buf; \
_mm->m_flags |= M_EXT; \
_mm->m_ext.ext_free = NULL; \
_mm->m_ext.ext_args = NULL; \
_mm->m_ext.ext_size = MCLBYTES; \
MEXT_INIT_REF(_mm); \
MEXT_INIT_REF(_mm, (how)); \
if (_mm->m_ext.ref_cnt == NULL) { \
_MCLFREE(_mm->m_ext.ext_buf); \
_mm->m_ext.ext_buf = NULL; \
} else { \
_mm->m_data = _mm->m_ext.ext_buf; \
_mm->m_flags |= M_EXT; \
_mm->m_ext.ext_free = NULL; \
_mm->m_ext.ext_args = NULL; \
_mm->m_ext.ext_size = MCLBYTES; \
} \
} \
} while (0)
#define MEXTADD(m, buf, size, free, args) do { \
struct mbuf *_mm = (m); \
\
_mm->m_flags |= M_EXT; \
_mm->m_ext.ext_buf = (caddr_t)(buf); \
_mm->m_data = _mm->m_ext.ext_buf; \
_mm->m_ext.ext_size = (size); \
_mm->m_ext.ext_free = (free); \
_mm->m_ext.ext_args = (args); \
MEXT_INIT_REF(_mm); \
MEXT_INIT_REF(_mm, M_WAIT); \
if (_mm->m_ext.ref_cnt != NULL) { \
_mm->m_flags |= M_EXT; \
_mm->m_ext.ext_buf = (caddr_t)(buf); \
_mm->m_data = _mm->m_ext.ext_buf; \
_mm->m_ext.ext_size = (size); \
_mm->m_ext.ext_free = (free); \
_mm->m_ext.ext_args = (args); \
} \
} while (0)
#define _MCLFREE(p) MBUFLOCK( \
#define _MCLFREE(p) do { \
union mcluster *_mp = (union mcluster *)(p); \
\
_mp->mcl_next = mclfree; \
mclfree = _mp; \
mtx_enter(&mclfree.m_mtx, MTX_DEF); \
_mp->mcl_next = mclfree.m_head; \
mclfree.m_head = _mp; \
mbstat.m_clfree++; \
MCLWAKEUP(); \
)
MBWAKEUP(m_clalloc_wid); \
mtx_exit(&mclfree.m_mtx, MTX_DEF); \
} while (0)
#define _MEXTFREE(m) do { \
#define MEXTFREE(m) do { \
struct mbuf *_mmm = (m); \
\
if (MEXT_IS_REF(_mmm)) \
@ -461,29 +486,27 @@ union mext_refcnt {
_mmm->m_flags &= ~M_EXT; \
} while (0)
#define MEXTFREE(m) MBUFLOCK( \
_MEXTFREE(m); \
)
/*
* MFREE(struct mbuf *m, struct mbuf *n)
* Free a single mbuf and associated external storage.
* Place the successor, if any, in n.
*/
#define MFREE(m, n) MBUFLOCK( \
#define MFREE(m, n) do { \
struct mbuf *_mm = (m); \
\
KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf")); \
if (_mm->m_flags & M_EXT) \
_MEXTFREE(_mm); \
MEXTFREE(_mm); \
mtx_enter(&mmbfree.m_mtx, MTX_DEF); \
mbtypes[_mm->m_type]--; \
_mm->m_type = MT_FREE; \
mbtypes[MT_FREE]++; \
(n) = _mm->m_next; \
_mm->m_next = mmbfree; \
mmbfree = _mm; \
MMBWAKEUP(); \
)
_mm->m_next = mmbfree.m_head; \
mmbfree.m_head = _mm; \
MBWAKEUP(m_mballoc_wid); \
mtx_exit(&mmbfree.m_mtx, MTX_DEF); \
} while (0)
/*
* Copy mbuf pkthdr from "from" to "to".
@ -557,15 +580,15 @@ union mext_refcnt {
*_mmp = _mm; \
} while (0)
/* change mbuf to new type */
/*
* change mbuf to new type
*/
#define MCHTYPE(m, t) do { \
struct mbuf *_mm = (m); \
int _mt = (t); \
int _ms = splimp(); \
\
mbtypes[_mm->m_type]--; \
mbtypes[_mt]++; \
splx(_ms); \
atomic_subtract_long(mbtypes[_mm->m_type], 1); \
atomic_add_long(mbtypes[_mt], 1); \
_mm->m_type = (_mt); \
} while (0)
@ -584,8 +607,8 @@ struct mauxtag {
};
#ifdef _KERNEL
extern u_int m_clalloc_wid; /* mbuf cluster wait count */
extern u_int m_mballoc_wid; /* mbuf wait count */
extern u_long m_clalloc_wid; /* mbuf cluster wait count */
extern u_long m_mballoc_wid; /* mbuf wait count */
extern int max_linkhdr; /* largest link-level header */
extern int max_protohdr; /* largest protocol header */
extern int max_hdr; /* largest link+protocol header */
@ -594,15 +617,15 @@ extern struct mbstat mbstat;
extern u_long mbtypes[MT_NTYPES]; /* per-type mbuf allocations */
extern int mbuf_wait; /* mbuf sleep time */
extern struct mbuf *mbutl; /* virtual address of mclusters */
extern union mcluster *mclfree;
extern struct mbuf *mmbfree;
extern union mext_refcnt *mext_refcnt_free;
extern struct mclfree_lst mclfree;
extern struct mbffree_lst mmbfree;
extern struct mcntfree_lst mcntfree;
extern int nmbclusters;
extern int nmbufs;
extern int nsfbufs;
void m_adj __P((struct mbuf *, int));
int m_alloc_ref __P((u_int));
int m_alloc_ref __P((u_int, int));
void m_cat __P((struct mbuf *,struct mbuf *));
int m_clalloc __P((int, int));
caddr_t m_clalloc_wait __P((void));
@ -619,13 +642,11 @@ struct mbuf *m_get __P((int, int));
struct mbuf *m_getclr __P((int, int));
struct mbuf *m_gethdr __P((int, int));
int m_mballoc __P((int, int));
struct mbuf *m_mballoc_wait __P((int, int));
struct mbuf *m_mballoc_wait __P((void));
struct mbuf *m_prepend __P((struct mbuf *,int,int));
struct mbuf *m_pulldown __P((struct mbuf *, int, int, int *));
void m_print __P((const struct mbuf *m));
struct mbuf *m_pullup __P((struct mbuf *, int));
struct mbuf *m_retry __P((int, int));
struct mbuf *m_retryhdr __P((int, int));
struct mbuf *m_split __P((struct mbuf *,int,int));
struct mbuf *m_aux_add __P((struct mbuf *, int, int));
struct mbuf *m_aux_find __P((struct mbuf *, int, int));