On the first day of Christmas bde gave to me:
A [hopefully] conforming style(9) revamp of mb_alloc and related code. (This was possible due to bde's remarkable patience.) Submitted by: (in large part) bde Reviewed by: (the other part) bde
This commit is contained in:
parent
257bc17600
commit
56b602dd6a
@ -1,6 +1,6 @@
|
||||
/*
|
||||
/*-
|
||||
* Copyright (c) 2001
|
||||
* Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
|
||||
* Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -41,6 +41,7 @@
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/domain.h>
|
||||
#include <sys/protosw.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_kern.h>
|
||||
#include <vm/vm_extern.h>
|
||||
@ -50,18 +51,18 @@
|
||||
* explicitly define MBALLOC_NCPU to be exactly the number of CPUs on your
|
||||
* system during compilation, and thus prevent kernel structure bloat.
|
||||
*
|
||||
* SMP and non-SMP kernels clearly have a different number of possible cpus,
|
||||
* SMP and non-SMP kernels clearly have a different number of possible CPUs,
|
||||
* but because we cannot assume a dense array of CPUs, we always allocate
|
||||
* and traverse PCPU containers up to NCPU amount and merely check for
|
||||
* CPU availability.
|
||||
*/
|
||||
#ifdef MBALLOC_NCPU
|
||||
#ifdef MBALLOC_NCPU
|
||||
#define NCPU MBALLOC_NCPU
|
||||
#else
|
||||
#define NCPU MAXCPU
|
||||
#endif
|
||||
|
||||
/*
|
||||
/*-
|
||||
* The mbuf allocator is heavily based on Alfred Perlstein's
|
||||
* (alfred@FreeBSD.org) "memcache" allocator which is itself based
|
||||
* on concepts from several per-CPU memory allocators. The difference
|
||||
@ -84,43 +85,42 @@
|
||||
* are kept together, thus trying to put the TLB cache to good use.
|
||||
*
|
||||
* The buckets are kept on singly-linked lists called "containers." A container
|
||||
* is protected by a mutex lock in order to ensure consistency. The mutex lock
|
||||
* is protected by a mutex lock in order to ensure consistency. The mutex lock
|
||||
* itself is allocated seperately and attached to the container at boot time,
|
||||
* thus allowing for certain containers to share the same mutex lock. Per-CPU
|
||||
* thus allowing for certain containers to share the same mutex lock. Per-CPU
|
||||
* containers for mbufs and mbuf clusters all share the same per-CPU
|
||||
* lock whereas the "general system" containers (i.e. the "main lists") for
|
||||
* lock whereas the "general system" containers (i.e., the "main lists") for
|
||||
* these objects share one global lock.
|
||||
*
|
||||
*/
|
||||
struct mb_bucket {
|
||||
SLIST_ENTRY(mb_bucket) mb_blist;
|
||||
int mb_owner;
|
||||
int mb_numfree;
|
||||
void *mb_free[0];
|
||||
SLIST_ENTRY(mb_bucket) mb_blist;
|
||||
int mb_owner;
|
||||
int mb_numfree;
|
||||
void *mb_free[0];
|
||||
};
|
||||
|
||||
struct mb_container {
|
||||
SLIST_HEAD(mc_buckethd, mb_bucket) mc_bhead;
|
||||
struct mtx *mc_lock;
|
||||
int mc_numowner;
|
||||
u_int mc_starved;
|
||||
long *mc_types;
|
||||
u_long *mc_objcount;
|
||||
u_long *mc_numpgs;
|
||||
SLIST_HEAD(mc_buckethd, mb_bucket) mc_bhead;
|
||||
struct mtx *mc_lock;
|
||||
int mc_numowner;
|
||||
u_int mc_starved;
|
||||
long *mc_types;
|
||||
u_long *mc_objcount;
|
||||
u_long *mc_numpgs;
|
||||
};
|
||||
|
||||
struct mb_gen_list {
|
||||
struct mb_container mb_cont;
|
||||
struct cv mgl_mstarved;
|
||||
struct mb_container mb_cont;
|
||||
struct cv mgl_mstarved;
|
||||
};
|
||||
|
||||
struct mb_pcpu_list {
|
||||
struct mb_container mb_cont;
|
||||
struct mb_container mb_cont;
|
||||
};
|
||||
|
||||
/*
|
||||
* Boot-time configurable object counts that will determine the maximum
|
||||
* number of permitted objects in the mbuf and mcluster cases. In the
|
||||
* number of permitted objects in the mbuf and mcluster cases. In the
|
||||
* ext counter (nmbcnt) case, it's just an indicator serving to scale
|
||||
* kmem_map size properly - in other words, we may be allowed to allocate
|
||||
* more than nmbcnt counters, whereas we will never be allowed to allocate
|
||||
@ -131,13 +131,13 @@ struct mb_pcpu_list {
|
||||
#ifndef NMBCLUSTERS
|
||||
#define NMBCLUSTERS (1024 + maxusers * 64)
|
||||
#endif
|
||||
#ifndef NMBUFS
|
||||
#ifndef NMBUFS
|
||||
#define NMBUFS (nmbclusters * 2)
|
||||
#endif
|
||||
#ifndef NSFBUFS
|
||||
#ifndef NSFBUFS
|
||||
#define NSFBUFS (512 + maxusers * 16)
|
||||
#endif
|
||||
#ifndef NMBCNTS
|
||||
#ifndef NMBCNTS
|
||||
#define NMBCNTS (nmbclusters + nsfbufs)
|
||||
#endif
|
||||
int nmbufs;
|
||||
@ -168,57 +168,56 @@ tunable_mbinit(void *dummy)
|
||||
nmbufs = nmbclusters * 2;
|
||||
if (nmbcnt < nmbclusters + nsfbufs)
|
||||
nmbcnt = nmbclusters + nsfbufs;
|
||||
|
||||
return;
|
||||
}
|
||||
SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
|
||||
|
||||
/*
|
||||
* The freelist structures and mutex locks. The number statically declared
|
||||
* The freelist structures and mutex locks. The number statically declared
|
||||
* here depends on the number of CPUs.
|
||||
*
|
||||
* We setup in such a way that all the objects (mbufs, clusters)
|
||||
* share the same mutex lock. It has been established that we do not benefit
|
||||
* We set up in such a way that all the objects (mbufs, clusters)
|
||||
* share the same mutex lock. It has been established that we do not benefit
|
||||
* from different locks for different objects, so we use the same lock,
|
||||
* regardless of object type.
|
||||
*/
|
||||
struct mb_lstmngr {
|
||||
struct mb_gen_list *ml_genlist;
|
||||
struct mb_pcpu_list *ml_cntlst[NCPU];
|
||||
struct mb_bucket **ml_btable;
|
||||
vm_map_t ml_map;
|
||||
vm_offset_t ml_mapbase;
|
||||
vm_offset_t ml_maptop;
|
||||
int ml_mapfull;
|
||||
u_int ml_objsize;
|
||||
u_int *ml_wmhigh;
|
||||
struct mb_gen_list *ml_genlist;
|
||||
struct mb_pcpu_list *ml_cntlst[NCPU];
|
||||
struct mb_bucket **ml_btable;
|
||||
vm_map_t ml_map;
|
||||
vm_offset_t ml_mapbase;
|
||||
vm_offset_t ml_maptop;
|
||||
int ml_mapfull;
|
||||
u_int ml_objsize;
|
||||
u_int *ml_wmhigh;
|
||||
};
|
||||
struct mb_lstmngr mb_list_mbuf, mb_list_clust;
|
||||
struct mtx mbuf_gen, mbuf_pcpu[NCPU];
|
||||
static struct mb_lstmngr mb_list_mbuf, mb_list_clust;
|
||||
static struct mtx mbuf_gen, mbuf_pcpu[NCPU];
|
||||
|
||||
/*
|
||||
* Local macros for internal allocator structure manipulations.
|
||||
*/
|
||||
#ifdef SMP
|
||||
#define MB_GET_PCPU_LIST(mb_lst) (mb_lst)->ml_cntlst[PCPU_GET(cpuid)]
|
||||
#define MB_GET_PCPU_LIST(mb_lst) (mb_lst)->ml_cntlst[PCPU_GET(cpuid)]
|
||||
#else
|
||||
#define MB_GET_PCPU_LIST(mb_lst) (mb_lst)->ml_cntlst[0]
|
||||
#define MB_GET_PCPU_LIST(mb_lst) (mb_lst)->ml_cntlst[0]
|
||||
#endif
|
||||
|
||||
#define MB_GET_PCPU_LIST_NUM(mb_lst, num) (mb_lst)->ml_cntlst[(num)]
|
||||
#define MB_GET_GEN_LIST(mb_lst) (mb_lst)->ml_genlist
|
||||
|
||||
#define MB_GET_GEN_LIST(mb_lst) (mb_lst)->ml_genlist
|
||||
#define MB_LOCK_CONT(mb_cnt) mtx_lock((mb_cnt)->mb_cont.mc_lock)
|
||||
|
||||
#define MB_LOCK_CONT(mb_cnt) mtx_lock((mb_cnt)->mb_cont.mc_lock)
|
||||
#define MB_UNLOCK_CONT(mb_cnt) mtx_unlock((mb_cnt)->mb_cont.mc_lock)
|
||||
|
||||
#define MB_UNLOCK_CONT(mb_cnt) mtx_unlock((mb_cnt)->mb_cont.mc_lock)
|
||||
#define MB_GET_PCPU_LIST_NUM(mb_lst, num) \
|
||||
(mb_lst)->ml_cntlst[(num)]
|
||||
|
||||
#define MB_BUCKET_INDX(mb_obj, mb_lst) \
|
||||
(int)(((caddr_t)(mb_obj) - (caddr_t)(mb_lst)->ml_mapbase) / PAGE_SIZE)
|
||||
|
||||
#define MB_GET_OBJECT(mb_objp, mb_bckt, mb_lst) \
|
||||
{ \
|
||||
struct mc_buckethd *_mchd = &((mb_lst)->mb_cont.mc_bhead); \
|
||||
struct mc_buckethd *_mchd = &((mb_lst)->mb_cont.mc_bhead); \
|
||||
\
|
||||
(mb_bckt)->mb_numfree--; \
|
||||
(mb_objp) = (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)]; \
|
||||
@ -244,24 +243,29 @@ struct mtx mbuf_gen, mbuf_pcpu[NCPU];
|
||||
(*((mb_cnt)->mb_cont.mc_types + (mb_type))) -= (mb_num)
|
||||
|
||||
/*
|
||||
* Ownership of buckets/containers is represented by integers. The PCPU
|
||||
* lists range from 0 to NCPU-1. We need a free numerical id for the general
|
||||
* list (we use NCPU). We also need a non-conflicting free bit to indicate
|
||||
* Ownership of buckets/containers is represented by integers. The PCPU
|
||||
* lists range from 0 to NCPU-1. We need a free numerical id for the general
|
||||
* list (we use NCPU). We also need a non-conflicting free bit to indicate
|
||||
* that the bucket is free and removed from a container, while not losing
|
||||
* the bucket's originating container id. We use the highest bit
|
||||
* the bucket's originating container id. We use the highest bit
|
||||
* for the free marker.
|
||||
*/
|
||||
#define MB_GENLIST_OWNER (NCPU)
|
||||
#define MB_BUCKET_FREE (1 << (sizeof(int) * 8 - 1))
|
||||
|
||||
/* Statistics structures for allocator (per-CPU and general). */
|
||||
static struct mbpstat mb_statpcpu[NCPU + 1];
|
||||
struct mbstat mbstat;
|
||||
|
||||
/* Sleep time for wait code (in ticks). */
|
||||
static int mbuf_wait = 64;
|
||||
|
||||
static u_int mbuf_limit = 512; /* Upper limit on # of mbufs per CPU. */
|
||||
static u_int clust_limit = 128; /* Upper limit on # of clusters per CPU. */
|
||||
|
||||
/*
|
||||
* sysctl(8) exported objects
|
||||
* Objects exported by sysctl(8).
|
||||
*/
|
||||
struct mbstat mbstat; /* General stats + infos. */
|
||||
struct mbpstat mb_statpcpu[NCPU+1]; /* PCPU + Gen. container alloc stats */
|
||||
int mbuf_wait = 64; /* Sleep time for wait code (ticks) */
|
||||
u_int mbuf_limit = 512; /* Upper lim. on # of mbufs per CPU */
|
||||
u_int clust_limit = 128; /* Upper lim. on # of clusts per CPU */
|
||||
SYSCTL_DECL(_kern_ipc);
|
||||
SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RD, &nmbclusters, 0,
|
||||
"Maximum number of mbuf clusters available");
|
||||
@ -285,16 +289,14 @@ SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu,
|
||||
/*
|
||||
* Prototypes of local allocator routines.
|
||||
*/
|
||||
static __inline void *mb_alloc(struct mb_lstmngr *, int, short);
|
||||
void *mb_alloc_wait(struct mb_lstmngr *, short);
|
||||
static __inline void mb_free(struct mb_lstmngr *, void *, short);
|
||||
static void mbuf_init(void *);
|
||||
struct mb_bucket *mb_pop_cont(struct mb_lstmngr *, int,
|
||||
static void *mb_alloc_wait(struct mb_lstmngr *, short);
|
||||
static struct mb_bucket *mb_pop_cont(struct mb_lstmngr *, int,
|
||||
struct mb_pcpu_list *);
|
||||
void mb_reclaim(void);
|
||||
static void mb_reclaim(void);
|
||||
static void mbuf_init(void *);
|
||||
|
||||
/*
|
||||
* Initial allocation numbers. Each parameter represents the number of buckets
|
||||
* Initial allocation numbers. Each parameter represents the number of buckets
|
||||
* of each object that will be placed initially in each PCPU container for
|
||||
* said object.
|
||||
*/
|
||||
@ -309,7 +311,7 @@ void mb_reclaim(void);
|
||||
* allocations, due to fear of one type of allocation "stealing" address
|
||||
* space initially reserved for another.
|
||||
*
|
||||
* Setup both the general containers and all the PCPU containers. Populate
|
||||
* Set up both the general containers and all the PCPU containers. Populate
|
||||
* the PCPU containers with initial numbers.
|
||||
*/
|
||||
MALLOC_DEFINE(M_MBUF, "mbufmgr", "mbuf subsystem management structures");
|
||||
@ -317,12 +319,12 @@ SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL)
|
||||
void
|
||||
mbuf_init(void *dummy)
|
||||
{
|
||||
struct mb_pcpu_list *pcpu_cnt;
|
||||
vm_size_t mb_map_size;
|
||||
int i, j;
|
||||
struct mb_pcpu_list *pcpu_cnt;
|
||||
vm_size_t mb_map_size;
|
||||
int i, j;
|
||||
|
||||
/*
|
||||
* Setup all the submaps, for each type of object that we deal
|
||||
* Set up all the submaps, for each type of object that we deal
|
||||
* with in this allocator.
|
||||
*/
|
||||
mb_map_size = (vm_size_t)(nmbufs * MSIZE);
|
||||
@ -350,7 +352,7 @@ mbuf_init(void *dummy)
|
||||
mb_list_clust.ml_objsize = MCLBYTES;
|
||||
mb_list_clust.ml_wmhigh = &clust_limit;
|
||||
|
||||
/* XXX XXX XXX: mbuf_map->system_map = clust_map->system_map = 1 */
|
||||
/* XXX XXX XXX: mbuf_map->system_map = clust_map->system_map = 1. */
|
||||
|
||||
/*
|
||||
* Allocate required general (global) containers for each object type.
|
||||
@ -374,7 +376,7 @@ mbuf_init(void *dummy)
|
||||
mb_list_clust.ml_genlist->mb_cont.mc_lock = &mbuf_gen;
|
||||
|
||||
/*
|
||||
* Setup the general containers for each object.
|
||||
* Set up the general containers for each object.
|
||||
*/
|
||||
mb_list_mbuf.ml_genlist->mb_cont.mc_numowner =
|
||||
mb_list_clust.ml_genlist->mb_cont.mc_numowner = MB_GENLIST_OWNER;
|
||||
@ -395,7 +397,7 @@ mbuf_init(void *dummy)
|
||||
SLIST_INIT(&(mb_list_clust.ml_genlist->mb_cont.mc_bhead));
|
||||
|
||||
/*
|
||||
* Initialize general mbuf statistics
|
||||
* Initialize general mbuf statistics.
|
||||
*/
|
||||
mbstat.m_msize = MSIZE;
|
||||
mbstat.m_mclbytes = MCLBYTES;
|
||||
@ -472,26 +474,26 @@ bad:
|
||||
|
||||
/*
|
||||
* Populate a given mbuf PCPU container with a bucket full of fresh new
|
||||
* buffers. Return a pointer to the new bucket (already in the container if
|
||||
* buffers. Return a pointer to the new bucket (already in the container if
|
||||
* successful), or return NULL on failure.
|
||||
*
|
||||
* LOCKING NOTES:
|
||||
* PCPU container lock must be held when this is called.
|
||||
* The lock is dropped here so that we can cleanly call the underlying VM
|
||||
* code. If we fail, we return with no locks held. If we succeed (i.e. return
|
||||
* code. If we fail, we return with no locks held. If we succeed (i.e., return
|
||||
* non-NULL), we return with the PCPU lock held, ready for allocation from
|
||||
* the returned bucket.
|
||||
*/
|
||||
struct mb_bucket *
|
||||
static struct mb_bucket *
|
||||
mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_pcpu_list *cnt_lst)
|
||||
{
|
||||
struct mb_bucket *bucket;
|
||||
caddr_t p;
|
||||
int i;
|
||||
struct mb_bucket *bucket;
|
||||
caddr_t p;
|
||||
int i;
|
||||
|
||||
MB_UNLOCK_CONT(cnt_lst);
|
||||
/*
|
||||
* If our object's (finite) map is starved now (i.e. no more address
|
||||
* If our object's (finite) map is starved now (i.e., no more address
|
||||
* space), bail out now.
|
||||
*/
|
||||
if (mb_list->ml_mapfull)
|
||||
@ -531,8 +533,8 @@ mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_pcpu_list *cnt_lst)
|
||||
|
||||
/*
|
||||
* Allocate an mbuf-subsystem type object.
|
||||
* The general case is very easy. Complications only arise if our PCPU
|
||||
* container is empty. Things get worse if the PCPU container is empty,
|
||||
* The general case is very easy. Complications only arise if our PCPU
|
||||
* container is empty. Things get worse if the PCPU container is empty,
|
||||
* the general container is empty, and we've run out of address space
|
||||
* in our map; then we try to block if we're willing to (M_TRYWAIT).
|
||||
*/
|
||||
@ -540,9 +542,10 @@ static __inline
|
||||
void *
|
||||
mb_alloc(struct mb_lstmngr *mb_list, int how, short type)
|
||||
{
|
||||
struct mb_pcpu_list *cnt_lst;
|
||||
struct mb_bucket *bucket;
|
||||
void *m;
|
||||
static int last_report;
|
||||
struct mb_pcpu_list *cnt_lst;
|
||||
struct mb_bucket *bucket;
|
||||
void *m;
|
||||
|
||||
m = NULL;
|
||||
cnt_lst = MB_GET_PCPU_LIST(mb_list);
|
||||
@ -559,7 +562,7 @@ mb_alloc(struct mb_lstmngr *mb_list, int how, short type)
|
||||
MB_MBTYPES_INC(cnt_lst, type, 1);
|
||||
MB_UNLOCK_CONT(cnt_lst);
|
||||
} else {
|
||||
struct mb_gen_list *gen_list;
|
||||
struct mb_gen_list *gen_list;
|
||||
|
||||
/*
|
||||
* This is the less-common more difficult case. We must
|
||||
@ -615,25 +618,22 @@ mb_alloc(struct mb_lstmngr *mb_list, int how, short type)
|
||||
MB_UNLOCK_CONT(cnt_lst);
|
||||
} else {
|
||||
if (how == M_TRYWAIT) {
|
||||
/*
|
||||
* Absolute worst-case scenario. We block if
|
||||
* we're willing to, but only after trying to
|
||||
* steal from other lists.
|
||||
*/
|
||||
/*
|
||||
* Absolute worst-case scenario.
|
||||
* We block if we're willing to, but
|
||||
* only after trying to steal from
|
||||
* other lists.
|
||||
*/
|
||||
m = mb_alloc_wait(mb_list, type);
|
||||
} else {
|
||||
/*
|
||||
* no way to indent this code decently
|
||||
* with 8-space tabs.
|
||||
*/
|
||||
static int last_report;
|
||||
/* XXX: No consistency. */
|
||||
mbstat.m_drops++;
|
||||
|
||||
if (ticks < last_report ||
|
||||
(ticks - last_report) >= hz) {
|
||||
last_report = ticks;
|
||||
printf(
|
||||
"mb_alloc for type %d failed, consider increase mbuf value.\n", type);
|
||||
"mb_alloc for mbuf type %d failed.\n", type);
|
||||
}
|
||||
|
||||
}
|
||||
@ -646,19 +646,19 @@ mb_alloc(struct mb_lstmngr *mb_list, int how, short type)
|
||||
|
||||
/*
|
||||
* This is the worst-case scenario called only if we're allocating with
|
||||
* M_TRYWAIT. We first drain all the protocols, then try to find an mbuf
|
||||
* by looking in every PCPU container. If we're still unsuccesful, we
|
||||
* M_TRYWAIT. We first drain all the protocols, then try to find an mbuf
|
||||
* by looking in every PCPU container. If we're still unsuccesful, we
|
||||
* try the general container one last time and possibly block on our
|
||||
* starved cv.
|
||||
*/
|
||||
void *
|
||||
static void *
|
||||
mb_alloc_wait(struct mb_lstmngr *mb_list, short type)
|
||||
{
|
||||
struct mb_pcpu_list *cnt_lst;
|
||||
struct mb_gen_list *gen_list;
|
||||
struct mb_bucket *bucket;
|
||||
void *m;
|
||||
int i, cv_ret;
|
||||
struct mb_pcpu_list *cnt_lst;
|
||||
struct mb_gen_list *gen_list;
|
||||
struct mb_bucket *bucket;
|
||||
void *m;
|
||||
int i, cv_ret;
|
||||
|
||||
/*
|
||||
* Try to reclaim mbuf-related objects (mbufs, clusters).
|
||||
@ -727,7 +727,7 @@ mb_alloc_wait(struct mb_lstmngr *mb_list, short type)
|
||||
return (m);
|
||||
}
|
||||
|
||||
/*
|
||||
/*-
|
||||
* Free an object to its rightful container.
|
||||
* In the very general case, this operation is really very easy.
|
||||
* Complications arise primarily if:
|
||||
@ -744,10 +744,10 @@ static __inline
|
||||
void
|
||||
mb_free(struct mb_lstmngr *mb_list, void *m, short type)
|
||||
{
|
||||
struct mb_pcpu_list *cnt_lst;
|
||||
struct mb_gen_list *gen_list;
|
||||
struct mb_bucket *bucket;
|
||||
u_int owner;
|
||||
struct mb_pcpu_list *cnt_lst;
|
||||
struct mb_gen_list *gen_list;
|
||||
struct mb_bucket *bucket;
|
||||
u_int owner;
|
||||
|
||||
bucket = mb_list->ml_btable[MB_BUCKET_INDX(m, mb_list)];
|
||||
|
||||
@ -891,24 +891,22 @@ retry_lock:
|
||||
MB_UNLOCK_CONT(cnt_lst);
|
||||
break;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drain protocols in hopes to free up some resources.
|
||||
*
|
||||
* LOCKING NOTES:
|
||||
* No locks should be held when this is called. The drain routines have to
|
||||
* No locks should be held when this is called. The drain routines have to
|
||||
* presently acquire some locks which raises the possibility of lock order
|
||||
* violation if we're holding any mutex if that mutex is acquired in reverse
|
||||
* order relative to one of the locks in the drain routines.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
mb_reclaim(void)
|
||||
{
|
||||
struct domain *dp;
|
||||
struct protosw *pr;
|
||||
struct domain *dp;
|
||||
struct protosw *pr;
|
||||
|
||||
/*
|
||||
* XXX: Argh, we almost always trip here with witness turned on now-a-days
|
||||
@ -922,18 +920,17 @@ mb_reclaim(void)
|
||||
|
||||
mbstat.m_drain++; /* XXX: No consistency. */
|
||||
|
||||
for (dp = domains; dp; dp = dp->dom_next)
|
||||
for (dp = domains; dp != NULL; dp = dp->dom_next)
|
||||
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
|
||||
if (pr->pr_drain)
|
||||
if (pr->pr_drain != NULL)
|
||||
(*pr->pr_drain)();
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Local mbuf & cluster alloc macros and routines.
|
||||
* Local macro and function names begin with an underscore ("_").
|
||||
*/
|
||||
void _mclfree(struct mbuf *);
|
||||
static void _mclfree(struct mbuf *);
|
||||
|
||||
#define _m_get(m, how, type) do { \
|
||||
(m) = (struct mbuf *)mb_alloc(&mb_list_mbuf, (how), (type)); \
|
||||
@ -960,7 +957,7 @@ void _mclfree(struct mbuf *);
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* XXX: Check for M_PKTHDR && m_pkthdr.aux is bogus... please fix (see KAME) */
|
||||
/* XXX: Check for M_PKTHDR && m_pkthdr.aux is bogus... please fix (see KAME). */
|
||||
#define _m_free(m, n) do { \
|
||||
(n) = (m)->m_next; \
|
||||
if ((m)->m_flags & M_EXT) \
|
||||
@ -991,20 +988,19 @@ _mext_free(struct mbuf *mb)
|
||||
mb_free(&mb_list_clust, (caddr_t)mb->m_ext.ext_buf, MT_NOTMBUF);
|
||||
else
|
||||
(*(mb->m_ext.ext_free))(mb->m_ext.ext_buf, mb->m_ext.ext_args);
|
||||
|
||||
_mext_dealloc_ref(mb);
|
||||
return;
|
||||
}
|
||||
|
||||
/* We only include this here to avoid making m_clget() excessively large
|
||||
* due to too much inlined code. */
|
||||
void
|
||||
/*
|
||||
* We only include this here to avoid making m_clget() excessively large
|
||||
* due to too much inlined code.
|
||||
*/
|
||||
static void
|
||||
_mclfree(struct mbuf *mb)
|
||||
{
|
||||
|
||||
mb_free(&mb_list_clust, (caddr_t)mb->m_ext.ext_buf, MT_NOTMBUF);
|
||||
mb->m_ext.ext_buf = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1013,7 +1009,7 @@ _mclfree(struct mbuf *mb)
|
||||
struct mbuf *
|
||||
m_get(int how, int type)
|
||||
{
|
||||
struct mbuf *mb;
|
||||
struct mbuf *mb;
|
||||
|
||||
_m_get(mb, how, type);
|
||||
return (mb);
|
||||
@ -1022,7 +1018,7 @@ m_get(int how, int type)
|
||||
struct mbuf *
|
||||
m_gethdr(int how, int type)
|
||||
{
|
||||
struct mbuf *mb;
|
||||
struct mbuf *mb;
|
||||
|
||||
_m_gethdr(mb, how, type);
|
||||
return (mb);
|
||||
@ -1031,33 +1027,29 @@ m_gethdr(int how, int type)
|
||||
struct mbuf *
|
||||
m_get_clrd(int how, int type)
|
||||
{
|
||||
struct mbuf *mb;
|
||||
struct mbuf *mb;
|
||||
|
||||
_m_get(mb, how, type);
|
||||
|
||||
if (mb != NULL)
|
||||
bzero(mtod(mb, caddr_t), MLEN);
|
||||
|
||||
return (mb);
|
||||
}
|
||||
|
||||
struct mbuf *
|
||||
m_gethdr_clrd(int how, int type)
|
||||
{
|
||||
struct mbuf *mb;
|
||||
struct mbuf *mb;
|
||||
|
||||
_m_gethdr(mb, how, type);
|
||||
|
||||
if (mb != NULL)
|
||||
bzero(mtod(mb, caddr_t), MHLEN);
|
||||
|
||||
return (mb);
|
||||
}
|
||||
|
||||
struct mbuf *
|
||||
m_free(struct mbuf *mb)
|
||||
{
|
||||
struct mbuf *nb;
|
||||
struct mbuf *nb;
|
||||
|
||||
_m_free(mb, nb);
|
||||
return (nb);
|
||||
@ -1081,12 +1073,11 @@ m_clget(struct mbuf *mb, int how)
|
||||
mb->m_ext.ext_type = EXT_CLUSTER;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
|
||||
void (*freef)(caddr_t, void *), void *args, short flags, int type)
|
||||
void (*freef)(caddr_t, void *), void *args, short flags, int type)
|
||||
{
|
||||
|
||||
_mext_init_ref(mb);
|
||||
@ -1099,7 +1090,6 @@ m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
|
||||
mb->m_ext.ext_args = args;
|
||||
mb->m_ext.ext_type = type;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1109,7 +1099,7 @@ m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
|
||||
void
|
||||
m_chtype(struct mbuf *mb, short new_type)
|
||||
{
|
||||
struct mb_gen_list *gen_list;
|
||||
struct mb_gen_list *gen_list;
|
||||
|
||||
gen_list = MB_GET_GEN_LIST(&mb_list_mbuf);
|
||||
MB_LOCK_CONT(gen_list);
|
||||
@ -1117,5 +1107,4 @@ m_chtype(struct mbuf *mb, short new_type)
|
||||
MB_MBTYPES_INC(gen_list, new_type, 1);
|
||||
MB_UNLOCK_CONT(gen_list);
|
||||
mb->m_type = new_type;
|
||||
return;
|
||||
}
|
||||
|
145
sys/sys/mbuf.h
145
sys/sys/mbuf.h
@ -1,4 +1,4 @@
|
||||
/*
|
||||
/*-
|
||||
* Copyright (c) 1982, 1986, 1988, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
@ -42,7 +42,7 @@
|
||||
* includes overhead. An mbuf may add a single "mbuf cluster" of size
|
||||
* MCLBYTES (also in machine/param.h), which has no additional overhead
|
||||
* and is used instead of the internal data area; this is done when
|
||||
* at least MINCLSIZE of data must be stored. Additionally, it is possible
|
||||
* at least MINCLSIZE of data must be stored. Additionally, it is possible
|
||||
* to allocate a separate buffer externally and attach it to the mbuf in
|
||||
* a way similar to that of mbuf clusters.
|
||||
*/
|
||||
@ -52,10 +52,10 @@
|
||||
#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* Macros for type conversion
|
||||
* mtod(m, t) - convert mbuf pointer to data pointer of correct type
|
||||
* dtom(x) - convert data pointer within mbuf to mbuf pointer (XXX)
|
||||
/*-
|
||||
* Macros for type conversion:
|
||||
* mtod(m, t) -- Convert mbuf pointer to data pointer of correct type.
|
||||
* dtom(x) -- Convert data pointer within mbuf to mbuf pointer (XXX).
|
||||
*/
|
||||
#define mtod(m, t) ((t)((m)->m_data))
|
||||
#define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1)))
|
||||
@ -130,12 +130,12 @@ struct mbuf {
|
||||
#define m_dat M_dat.M_databuf
|
||||
|
||||
/*
|
||||
* mbuf flags
|
||||
* mbuf flags.
|
||||
*/
|
||||
#define M_EXT 0x0001 /* has associated external storage */
|
||||
#define M_PKTHDR 0x0002 /* start of record */
|
||||
#define M_EOR 0x0004 /* end of record */
|
||||
#define M_RDONLY 0x0008 /* associated data is marked read-only */
|
||||
#define M_RDONLY 0x0008 /* associated data is marked read-only */
|
||||
#define M_PROTO1 0x0010 /* protocol-specific */
|
||||
#define M_PROTO2 0x0020 /* protocol-specific */
|
||||
#define M_PROTO3 0x0040 /* protocol-specific */
|
||||
@ -143,7 +143,7 @@ struct mbuf {
|
||||
#define M_PROTO5 0x0100 /* protocol-specific */
|
||||
|
||||
/*
|
||||
* mbuf pkthdr flags (also stored in m_flags)
|
||||
* mbuf pkthdr flags (also stored in m_flags).
|
||||
*/
|
||||
#define M_BCAST 0x0200 /* send/received as link-level broadcast */
|
||||
#define M_MCAST 0x0400 /* send/received as link-level multicast */
|
||||
@ -152,7 +152,7 @@ struct mbuf {
|
||||
#define M_LASTFRAG 0x2000 /* packet is last fragment */
|
||||
|
||||
/*
|
||||
* External buffer types: identify ext_buf type
|
||||
* External buffer types: identify ext_buf type.
|
||||
*/
|
||||
#define EXT_CLUSTER 1 /* mbuf cluster */
|
||||
#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */
|
||||
@ -160,30 +160,30 @@ struct mbuf {
|
||||
#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */
|
||||
|
||||
/*
|
||||
* Flags copied when copying m_pkthdr
|
||||
* Flags copied when copying m_pkthdr.
|
||||
*/
|
||||
#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \
|
||||
M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG|M_RDONLY)
|
||||
|
||||
/*
|
||||
* Flags indicating hw checksum support and sw checksum requirements
|
||||
* Flags indicating hw checksum support and sw checksum requirements.
|
||||
*/
|
||||
#define CSUM_IP 0x0001 /* will csum IP */
|
||||
#define CSUM_TCP 0x0002 /* will csum TCP */
|
||||
#define CSUM_UDP 0x0004 /* will csum UDP */
|
||||
#define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */
|
||||
#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
|
||||
#define CSUM_IP 0x0001 /* will csum IP */
|
||||
#define CSUM_TCP 0x0002 /* will csum TCP */
|
||||
#define CSUM_UDP 0x0004 /* will csum UDP */
|
||||
#define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */
|
||||
#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
|
||||
|
||||
#define CSUM_IP_CHECKED 0x0100 /* did csum IP */
|
||||
#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
|
||||
#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */
|
||||
#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */
|
||||
#define CSUM_IP_CHECKED 0x0100 /* did csum IP */
|
||||
#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
|
||||
#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */
|
||||
#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */
|
||||
|
||||
#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
|
||||
#define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */
|
||||
#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
|
||||
#define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */
|
||||
|
||||
/*
|
||||
* mbuf types
|
||||
* mbuf types.
|
||||
*/
|
||||
#define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */
|
||||
#define MT_DATA 1 /* dynamic (data) allocation */
|
||||
@ -223,10 +223,10 @@ struct mbpstat {
|
||||
/*
|
||||
* General mbuf allocator statistics structure.
|
||||
* XXX: Modifications of these are not protected by any mutex locks nor by
|
||||
* any atomic() manipulations. As a result, we may occasionally lose
|
||||
* a count or two. Luckily, not all of these fields are modified at all
|
||||
* and remain static, and those that are manipulated are only manipulated
|
||||
* in failure situations, which do not occur (hopefully) very often.
|
||||
* any atomic() manipulations. As a result, we may occasionally lose
|
||||
* a count or two. Luckily, not all of these fields are modified at all
|
||||
* and remain static, and those that are manipulated are only manipulated
|
||||
* in failure situations, which do not occur (hopefully) very often.
|
||||
*/
|
||||
struct mbstat {
|
||||
u_long m_drops; /* times failed to allocate */
|
||||
@ -239,8 +239,8 @@ struct mbstat {
|
||||
u_long m_minclsize; /* min length of data to allocate a cluster */
|
||||
u_long m_mlen; /* length of data in an mbuf */
|
||||
u_long m_mhlen; /* length of data in a header mbuf */
|
||||
short m_numtypes; /* number of mbtypes (gives # elems in mbpstat's
|
||||
mb_mbtypes[] array. */
|
||||
/* Number of mbtypes (gives # elems in mbpstat's mb_mbtypes[] array: */
|
||||
short m_numtypes;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -254,51 +254,43 @@ struct mbstat {
|
||||
#define M_WAIT M_TRYWAIT /* XXX: Deprecated. */
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* mbuf external reference count management macros
|
||||
/*-
|
||||
* mbuf external reference count management macros.
|
||||
*
|
||||
* MEXT_IS_REF(m): true if (m) is not the only mbuf referencing
|
||||
* the external buffer ext_buf
|
||||
* the external buffer ext_buf.
|
||||
*
|
||||
* MEXT_REM_REF(m): remove reference to m_ext object
|
||||
* MEXT_REM_REF(m): remove reference to m_ext object.
|
||||
*
|
||||
* MEXT_ADD_REF(m): add reference to m_ext object already
|
||||
* referred to by (m)
|
||||
* referred to by (m).
|
||||
*/
|
||||
#define MEXT_IS_REF(m) (*((m)->m_ext.ref_cnt) > 1)
|
||||
#define MEXT_IS_REF(m) (*((m)->m_ext.ref_cnt) > 1)
|
||||
|
||||
#define MEXT_REM_REF(m) do { \
|
||||
#define MEXT_REM_REF(m) do { \
|
||||
KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \
|
||||
atomic_subtract_int((m)->m_ext.ref_cnt, 1); \
|
||||
} while(0)
|
||||
|
||||
#define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1)
|
||||
#define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1)
|
||||
|
||||
/*
|
||||
* mbuf, cluster, and external object allocation macros
|
||||
* (for compatibility purposes)
|
||||
* (for compatibility purposes).
|
||||
*/
|
||||
#define MGET(m, how, type) \
|
||||
(m) = m_get((how), (type))
|
||||
|
||||
#define MGETHDR(m, how, type) \
|
||||
(m) = m_gethdr((how), (type))
|
||||
|
||||
#define MCLGET(m, how) \
|
||||
m_clget((m), (how))
|
||||
|
||||
#define MEXTADD(m, buf, size, free, args, flags, type) \
|
||||
#define m_getclr m_get_clrd
|
||||
#define MGET(m, how, type) (m) = m_get((how), (type))
|
||||
#define MGETHDR(m, how, type) (m) = m_gethdr((how), (type))
|
||||
#define MCLGET(m, how) m_clget((m), (how))
|
||||
#define MEXTADD(m, buf, size, free, args, flags, type) \
|
||||
m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type))
|
||||
|
||||
#define MFREE(m, n) do { \
|
||||
(n) = m_free((m)); \
|
||||
(m) = NULL; \
|
||||
} while (0)
|
||||
|
||||
#define m_getclr m_get_clrd
|
||||
|
||||
/*
|
||||
* MEXTFREE(m): disassociate (and possibly free) an external object from `m'
|
||||
* MEXTFREE(m): disassociate (and possibly free) an external object from (m).
|
||||
*
|
||||
* If the atomic_cmpset_int() returns 0, then we effectively do nothing
|
||||
* in terms of "cleaning up" (freeing the ext buf and ref. counter) as
|
||||
@ -315,22 +307,21 @@ struct mbstat {
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* M_WRITABLE(m)
|
||||
* Evaluate TRUE if it's safe to write to the mbuf m's data region (this
|
||||
* can be both the local data payload, or an external buffer area,
|
||||
* depending on whether M_EXT is set).
|
||||
*/
|
||||
#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && (!((m)->m_flags \
|
||||
#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && (!((m)->m_flags \
|
||||
& M_EXT) || !MEXT_IS_REF(m)))
|
||||
|
||||
/*
|
||||
/*-
|
||||
* Copy mbuf pkthdr from "from" to "to".
|
||||
* from must have M_PKTHDR set, and to must be empty.
|
||||
* aux pointer will be moved to `to'.
|
||||
* "from" must have M_PKTHDR set, and "to" must be empty.
|
||||
* aux pointer will be moved to "to".
|
||||
*/
|
||||
#define M_COPY_PKTHDR(to, from) do { \
|
||||
struct mbuf *_mfrom = (from); \
|
||||
struct mbuf *_mto = (to); \
|
||||
struct mbuf *_mfrom = (from); \
|
||||
struct mbuf *_mto = (to); \
|
||||
\
|
||||
_mto->m_data = _mto->m_pktdat; \
|
||||
_mto->m_flags = _mfrom->m_flags & M_COPYFLAGS; \
|
||||
@ -380,10 +371,10 @@ struct mbstat {
|
||||
* set to NULL.
|
||||
*/
|
||||
#define M_PREPEND(m, plen, how) do { \
|
||||
struct mbuf **_mmp = &(m); \
|
||||
struct mbuf *_mm = *_mmp; \
|
||||
int _mplen = (plen); \
|
||||
int __mhow = (how); \
|
||||
struct mbuf **_mmp = &(m); \
|
||||
struct mbuf *_mm = *_mmp; \
|
||||
int _mplen = (plen); \
|
||||
int __mhow = (how); \
|
||||
\
|
||||
if (M_LEADINGSPACE(_mm) >= _mplen) { \
|
||||
_mm->m_data -= _mplen; \
|
||||
@ -401,10 +392,10 @@ struct mbstat {
|
||||
*/
|
||||
#define MCHTYPE(m, t) m_chtype((m), (t))
|
||||
|
||||
/* length to m_copy to copy all */
|
||||
/* Length to m_copy to copy all. */
|
||||
#define M_COPYALL 1000000000
|
||||
|
||||
/* compatibility with 4.3 */
|
||||
/* Compatibility with 4.3 */
|
||||
#define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT)
|
||||
|
||||
/*
|
||||
@ -413,19 +404,19 @@ struct mbstat {
|
||||
struct mauxtag {
|
||||
int af;
|
||||
int type;
|
||||
void* p;
|
||||
void *p;
|
||||
};
|
||||
|
||||
extern int max_datalen; /* MHLEN - max_hdr */
|
||||
extern int max_hdr; /* largest link + protocol header */
|
||||
extern int max_linkhdr; /* largest link-level header */
|
||||
extern int max_protohdr; /* largest protocol header */
|
||||
extern struct mbpstat mb_statpcpu[]; /* Per-CPU allocation stats. */
|
||||
extern struct mbstat mbstat; /* General mbuf stats/infos. */
|
||||
extern int nmbclusters; /* Maximum number of clusters */
|
||||
extern int nmbcnt; /* Scale kmem_map for counter space */
|
||||
extern int nmbufs; /* Maximum number of mbufs */
|
||||
extern int nsfbufs; /* Number of sendfile(2) bufs */
|
||||
extern int max_datalen; /* MHLEN - max_hdr */
|
||||
extern int max_hdr; /* largest link + protocol header */
|
||||
extern int max_linkhdr; /* largest link-level header */
|
||||
extern int max_protohdr; /* largest protocol header */
|
||||
extern struct mbpstat mb_statpcpu[]; /* Per-CPU allocation stats. */
|
||||
extern struct mbstat mbstat; /* General mbuf stats/infos. */
|
||||
extern int nmbclusters; /* Maximum number of clusters */
|
||||
extern int nmbcnt; /* Scale kmem_map for counter space */
|
||||
extern int nmbufs; /* Maximum number of mbufs */
|
||||
extern int nsfbufs; /* Number of sendfile(2) bufs */
|
||||
|
||||
void _mext_free(struct mbuf *);
|
||||
void m_adj(struct mbuf *, int);
|
||||
|
Loading…
x
Reference in New Issue
Block a user