add reference count pointer to mbuf iovec

implement robust version of m_collapse
add support for sf_buf
add fix for m_iovappend
add calls to m_sanity under INVARIANTS
fix m_freem_vec to correctly travese the mbuf iovec chain
This commit is contained in:
Kip Macy 2007-04-14 20:38:38 +00:00
parent 21c5f3f383
commit 642046797b
2 changed files with 384 additions and 164 deletions

View File

@ -34,9 +34,8 @@
#define mtomv(m) ((struct mbuf_vec *)((m)->m_pktdat))
#define M_IOVEC 0x40000 /* mbuf immediate data area is used for cluster ptrs */
#define MAX_MBUF_IOV 12
#define MBUF_IOV_TYPE_MASK ((1<<3)-1)
#define M_IOVEC 0x100000 /* mbuf immediate data area is used for cluster ptrs */
#define MBUF_IOV_TYPE_MASK ((1<<3)-1)
#define mbuf_vec_set_type(mv, i, type) \
(mv)->mv_vec[(i)].mi_flags = (((mv)->mv_vec[(i)].mi_flags \
& ~MBUF_IOV_TYPE_MASK) | type)
@ -46,40 +45,40 @@
struct mbuf_iovec {
uint32_t mi_flags; /* per-cluster flags */
uint16_t mi_size; /* length of clusters */
uint16_t mi_offset; /* data offsets of clusters */
caddr_t mi_base; /* pointers to clusters */
uint16_t mi_flags; /* per-cluster flags */
uint16_t mi_len; /* length of cluster */
uint32_t mi_offset; /* data offsets into cluster */
uint8_t *mi_base; /* pointers to cluster */
volatile uint32_t *mi_refcnt; /* refcnt for cluster*/
#ifdef __i386__
void *mi_args; /* for sf_buf */
#endif
};
/*
* m_pktdat == 200 bytes on 64-bit arches, need to stay below that
*
* 12*16 + 8 == 200
*/
#define MAX_MBUF_IOV ((MHLEN-8)/sizeof(struct mbuf_iovec))
struct mbuf_vec {
uint16_t mv_first; /* first valid cluster */
uint16_t mv_count; /* # of clusters */
uint32_t mv_flags; /* flags for iovec */
struct mbuf_iovec mv_vec[MAX_MBUF_IOV];
};
};
int _m_explode(struct mbuf *);
int _m_collapse(struct mbuf *, int maxbufs, struct mbuf **);
void mb_free_vec(struct mbuf *m);
static __inline void
m_iovinit(struct mbuf *m)
{
struct mbuf_vec *mv = mtomv(m);
mv->mv_first = mv->mv_count = 0;
m->m_flags |= M_IOVEC;
struct mbuf_vec *mv = mtomv(m);
mv->mv_first = mv->mv_count = 0;
m->m_pkthdr.len = m->m_len = 0;
m->m_flags |= M_IOVEC;
}
static __inline void
m_iovappend(struct mbuf *m, void *cl, int size, int len, int offset)
m_iovappend(struct mbuf *m, uint8_t *cl, int size, int len, int offset)
{
struct mbuf_vec *mv = mtomv(m);
struct mbuf_iovec *iov;
@ -90,12 +89,12 @@ m_iovappend(struct mbuf *m, void *cl, int size, int len, int offset)
panic("invalid flags in %s", __func__);
if (mv->mv_count == 0)
m->m_data = cl;
m->m_data = cl + offset;
iov = &mv->mv_vec[idx];
iov->mi_flags = m_gettype(size);
iov->mi_base = cl;
iov->mi_size = len;
iov->mi_len = len;
iov->mi_offset = offset;
m->m_pkthdr.len += len;
m->m_len += len;
@ -104,7 +103,7 @@ m_iovappend(struct mbuf *m, void *cl, int size, int len, int offset)
static __inline int
m_explode(struct mbuf *m)
{
{
if ((m->m_flags & M_IOVEC) == 0)
return (0);
@ -114,34 +113,78 @@ m_explode(struct mbuf *m)
static __inline int
m_collapse(struct mbuf *m, int maxbufs, struct mbuf **mnew)
{
/*
* Add checks here
*/
#if (!defined(__sparc64__) && !defined(__sun4v__))
if (m->m_next == NULL)
#endif
{
*mnew = m;
return (0);
}
return _m_collapse(m, maxbufs, mnew);
}
static __inline struct mbuf *
m_free_vec(struct mbuf *m)
{
struct mbuf *n = m->m_next;
if (m->m_flags & M_IOVEC)
mb_free_vec(m);
else if (m->m_flags & M_EXT)
mb_free_ext(m);
else
uma_zfree(zone_mbuf, m);
return (n);
}
static __inline void
m_freem_vec(struct mbuf *m)
{
struct mbuf *n;
while (m != NULL) {
n = m->m_next;
if (m->m_flags & M_IOVEC)
mb_free_vec(m);
else if (m->m_flags & M_EXT)
mb_free_ext(m);
else
uma_zfree(zone_mbuf, m);
m = n;
}
while (m != NULL)
m = m_free_vec(m);
}
static __inline uma_zone_t
m_getzonefromtype(int type)
{
uma_zone_t zone;
switch (type) {
case EXT_MBUF:
zone = zone_mbuf;
break;
case EXT_CLUSTER:
zone = zone_clust;
break;
#if MJUMPAGESIZE != MCLBYTES
case EXT_JUMBOP:
zone = zone_jumbop;
break;
#endif
case EXT_JUMBO9:
zone = zone_jumbo9;
break;
case EXT_JUMBO16:
zone = zone_jumbo16;
break;
#ifndef PACKET_ZONE_DISABLED
case EXT_PACKET:
zone = zone_pack;
break;
#endif
default:
panic("%s: invalid cluster type %d", __func__, type);
}
return (zone);
}
#if (!defined(__sparc64__) && !defined(__sun4v__))
int
bus_dmamap_load_mvec_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
bus_dma_segment_t *segs, int *nsegs, int flags);
#else
#define bus_dmamap_load_mvec_sg bus_dmamap_load_mbuf_sg
#endif
#endif

View File

@ -38,129 +38,301 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/ktr.h>
#include <sys/sf_buf.h>
#include <machine/bus.h>
#include <dev/cxgb/sys/mvec.h>
#include "opt_zero.h"
#ifdef ZERO_COPY_SOCKETS
#error "ZERO_COPY_SOCKETS not supported with mvec"
#endif
#ifdef DEBUG
#define DPRINTF printf
#else
#define DPRINTF(...)
#endif
#ifdef INVARIANTS
#define M_SANITY m_sanity
#else
#define M_SANITY(a, b)
#endif
#define MAX_BUFS 36
#define MAX_HVEC 8
extern uint32_t collapse_free;
extern uint32_t mb_free_vec_free;
struct mbuf_ext {
struct mbuf *me_m;
caddr_t me_base;
volatile u_int *me_refcnt;
int me_flags;
uint32_t me_offset;
};
int
_m_explode(struct mbuf *m)
{
int i, offset, type;
void *cl;
int i, offset, type, first, len;
uint8_t *cl;
struct mbuf *m0, *head = NULL;
struct mbuf_vec *mv;
#ifdef INVARIANTS
len = m->m_len;
m0 = m->m_next;
while (m0) {
KASSERT((m0->m_flags & M_PKTHDR) == 0,
("pkthdr set on intermediate mbuf - pre"));
len += m0->m_len;
m0 = m0->m_next;
}
if (len != m->m_pkthdr.len)
panic("at start len=%d pktlen=%d", len, m->m_pkthdr.len);
#endif
mv = mtomv(m);
for (i = mv->mv_count + mv->mv_first - 1;
i > mv->mv_first; i--) {
first = mv->mv_first;
for (i = mv->mv_count + first - 1; i > first; i--) {
type = mbuf_vec_get_type(mv, i);
cl = mv->mv_vec[i].mi_base;
if ((m0 = m_get(M_NOWAIT, MT_DATA)) == NULL) {
m_freem(head);
return (ENOMEM);
offset = mv->mv_vec[i].mi_offset;
len = mv->mv_vec[i].mi_len;
if (__predict_false(type == EXT_MBUF)) {
m0 = (struct mbuf *)cl;
KASSERT((m0->m_flags & M_EXT) == 0, ("M_EXT set on mbuf"));
m0->m_len = len;
m0->m_data = cl + offset;
goto skip_cluster;
} else if ((m0 = m_get(M_NOWAIT, MT_DATA)) == NULL) {
/*
* Check for extra memory leaks
*/
m_freem(head);
return (ENOMEM);
}
m0->m_flags = 0;
type = mbuf_vec_get_type(mv, i);
m_cljset(m0, (uint8_t *)cl, type);
m0->m_len = mv->mv_vec[i].mi_size;
offset = mv->mv_vec[i].mi_offset;
m0->m_len = mv->mv_vec[i].mi_len;
if (offset)
m_adj(m, offset);
m_adj(m0, offset);
skip_cluster:
m0->m_next = head;
m->m_len -= m0->m_len;
head = m0;
}
offset = mv->mv_vec[0].mi_offset;
cl = mv->mv_vec[0].mi_base;
type = mbuf_vec_get_type(mv, 0);
m->m_flags &= ~(M_IOVEC);
offset = mv->mv_vec[first].mi_offset;
cl = mv->mv_vec[first].mi_base;
type = mbuf_vec_get_type(mv, first);
m->m_flags &= ~(M_IOVEC);
m_cljset(m, cl, type);
if (offset)
m_adj(m, offset);
m->m_next = head;
head = m;
M_SANITY(m, 0);
#ifdef INVARIANTS
len = head->m_len;
m = m->m_next;
while (m) {
KASSERT((m->m_flags & M_PKTHDR) == 0,
("pkthdr set on intermediate mbuf - post"));
len += m->m_len;
m = m->m_next;
}
if (len != head->m_pkthdr.len)
panic("len=%d pktlen=%d", len, head->m_pkthdr.len);
#endif
return (0);
}
#define MAX_BUFS 36
static __inline int
m_vectorize(struct mbuf *m, int max, struct mbuf **vec, int *count)
{
int i, error = 0;
for (i = 0; i < max; i++) {
if (m == NULL)
break;
#ifndef PACKET_ZONE_DISABLED
if ((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_PACKET))
return (EINVAL);
#endif
if (m->m_len == 0)
DPRINTF("m=%p is len=0\n", m);
M_SANITY(m, 0);
vec[i] = m;
m = m->m_next;
}
if (m)
error = EFBIG;
*count = i;
return (error);
}
static __inline int
m_findmbufs(struct mbuf **ivec, int maxbufs, struct mbuf_ext *ovec, int osize, int *ocount)
{
int i, j, nhbufsneed, nhbufs;
struct mbuf *m;
nhbufsneed = min(((maxbufs - 1)/MAX_MBUF_IOV) + 1, osize);
ovec[0].me_m = NULL;
for (nhbufs = j = i = 0; i < maxbufs && nhbufs < nhbufsneed; i++) {
if ((ivec[i]->m_flags & M_EXT) == 0)
continue;
m = ivec[i];
ovec[nhbufs].me_m = m;
ovec[nhbufs].me_base = m->m_ext.ext_buf;
ovec[nhbufs].me_refcnt = m->m_ext.ref_cnt;
ovec[nhbufs].me_offset = (m->m_data - m->m_ext.ext_buf);
ovec[nhbufs].me_flags = m->m_ext.ext_type;
nhbufs++;
}
if (nhbufs == 0) {
if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
goto m_getfail;
ovec[nhbufs].me_m = m;
nhbufs = 1;
}
while (nhbufs < nhbufsneed) {
if ((m = m_get(M_NOWAIT, MT_DATA)) == NULL)
goto m_getfail;
ovec[nhbufs].me_m = m;
nhbufs++;
}
/*
* Copy over packet header to new head of chain
*/
if (ovec[0].me_m != ivec[0]) {
ovec[0].me_m->m_flags |= M_PKTHDR;
memcpy(&ovec[0].me_m->m_pkthdr, &ivec[0]->m_pkthdr, sizeof(struct pkthdr));
SLIST_INIT(&ivec[0]->m_pkthdr.tags);
}
*ocount = nhbufs;
return (0);
m_getfail:
for (i = 0; i < nhbufs; i++)
if ((ovec[i].me_m->m_flags & M_EXT) == 0)
uma_zfree(zone_mbuf, ovec[i].me_m);
return (ENOMEM);
}
static __inline void
m_setiovec(struct mbuf_iovec *mi, struct mbuf *m, struct mbuf_ext *extvec, int *me_index,
int max_me_index)
{
int idx = *me_index;
mi->mi_len = m->m_len;
if (idx < max_me_index && extvec[idx].me_m == m) {
struct mbuf_ext *me = &extvec[idx];
(*me_index)++;
mi->mi_base = me->me_base;
mi->mi_refcnt = me->me_refcnt;
mi->mi_offset = me->me_offset;
mi->mi_flags = me->me_flags;
} else if (m->m_flags & M_EXT) {
mi->mi_base = m->m_ext.ext_buf;
mi->mi_refcnt = m->m_ext.ref_cnt;
mi->mi_offset =
(m->m_data - m->m_ext.ext_buf);
mi->mi_flags = m->m_ext.ext_type;
} else {
KASSERT(m->m_len < 256, ("mbuf too large len=%d",
m->m_len));
mi->mi_base = (uint8_t *)m;
mi->mi_refcnt = NULL;
mi->mi_offset =
(m->m_data - (caddr_t)m);
mi->mi_flags = EXT_MBUF;
}
DPRINTF("type=%d len=%d refcnt=%p cl=%p offset=0x%x\n",
mi->mi_flags, mi->mi_len, mi->mi_refcnt, mi->mi_base,
mi->mi_offset);
}
int
_m_collapse(struct mbuf *m, int maxbufs, struct mbuf **mnew)
{
struct mbuf *m0, *lvec[MAX_BUFS];
struct mbuf **mnext, **vec = &lvec[0];
struct mbuf *m0, *lmvec[MAX_BUFS];
struct mbuf **mnext;
struct mbuf **vec = &lmvec[0];
struct mbuf *mhead = NULL;
struct mbuf_vec *mv;
int i, j, max;
int err, i, j, max, len, nhbufs;
struct mbuf_ext dvec[MAX_HVEC];
int hidx = 0, dvecidx;
if (maxbufs > MAX_BUFS)
M_SANITY(m, 0);
if (maxbufs > MAX_BUFS) {
if ((vec = malloc(maxbufs * sizeof(struct mbuf *),
M_DEVBUF, M_NOWAIT)) == NULL)
return (ENOMEM);
m0 = m;
for (i = 0; i < maxbufs; i++) {
if (m0 == NULL)
goto batch;
vec[i] = m0;
m0 = m0->m_next;
}
if (i == maxbufs)
return (EFBIG);
batch:
max = i;
i = 0;
m0 = NULL;
mnext = NULL;
while (i < max) {
if ((vec[i]->m_flags & M_EXT) == 0) {
m0 = m_get(M_NOWAIT, MT_DATA);
} else {
m0 = vec[i];
m0->m_flags = (vec[i]->m_flags & ~M_EXT);
}
if ((err = m_vectorize(m, maxbufs, vec, &max)) != 0)
return (err);
if ((err = m_findmbufs(vec, max, dvec, MAX_HVEC, &nhbufs)) != 0)
return (err);
KASSERT(max > 0, ("invalid mbuf count"));
KASSERT(nhbufs > 0, ("invalid header mbuf count"));
mhead = m0 = dvec[0].me_m;
DPRINTF("nbufs=%d nhbufs=%d\n", max, nhbufs);
for (hidx = dvecidx = i = 0, mnext = NULL; i < max; hidx++) {
m0 = dvec[hidx].me_m;
m0->m_flags &= ~M_EXT;
m0->m_flags |= M_IOVEC;
if (m0 == NULL)
goto m_getfail;
if (i == 0)
mhead = m0;
if (mnext)
if (mnext)
*mnext = m0;
mv = mtomv(m0);
mv->mv_count = mv->mv_first = 0;
for (j = 0; j < MAX_MBUF_IOV; j++, i++) {
if (vec[i]->m_flags & M_EXT) {
mv->mv_vec[j].mi_base = vec[i]->m_ext.ext_buf;
mv->mv_vec[j].mi_offset =
(vec[i]->m_ext.ext_buf - vec[i]->m_data);
mv->mv_vec[j].mi_size = vec[i]->m_ext.ext_size;
mv->mv_vec[j].mi_flags = vec[i]->m_ext.ext_type;
} else {
mv->mv_vec[j].mi_base = (caddr_t)vec[i];
mv->mv_vec[j].mi_offset =
((caddr_t)vec[i] - vec[i]->m_data);
mv->mv_vec[j].mi_size = MSIZE;
mv->mv_vec[j].mi_flags = EXT_MBUF;
}
}
mnext = &m0->m_next;
}
len = mv->mv_first = 0;
for (j = 0; j < MAX_MBUF_IOV && i < max; j++, i++) {
struct mbuf_iovec *mi = &mv->mv_vec[j];
mhead->m_flags |= (m0->m_flags & M_PKTHDR);
m_setiovec(mi, vec[i], dvec, &dvecidx, nhbufs);
len += mi->mi_len;
}
m0->m_data = mv->mv_vec[0].mi_base + mv->mv_vec[0].mi_offset;
mv->mv_count = j;
m0->m_len = len;
mnext = &m0->m_next;
DPRINTF("count=%d len=%d\n", j, len);
}
/*
* Terminate chain
*/
m0->m_next = NULL;
/*
* Free all mbufs not used by the mbuf iovec chain
*/
for (i = 0; i < max; i++)
if (vec[i]->m_flags & M_EXT) {
vec[i]->m_flags &= ~M_EXT;
collapse_free++;
uma_zfree(zone_mbuf, vec[i]);
}
*mnew = mhead;
return (0);
m_getfail:
m0 = mhead;
while (mhead) {
mhead = m0->m_next;
uma_zfree(zone_mbuf, m0);
}
return (ENOMEM);
}
void
@ -170,70 +342,66 @@ mb_free_vec(struct mbuf *m)
int i;
KASSERT((m->m_flags & (M_EXT|M_IOVEC)) == M_IOVEC,
("%s: M_IOVEC not set", __func__));
("%s: M_EXT set", __func__));
mv = mtomv(m);
KASSERT(mv->mv_count <= MAX_MBUF_IOV,
("%s: mi_count too large %d", __func__, mv->mv_count));
DPRINTF("count=%d len=%d\n", mv->mv_count, m->m_len);
for (i = mv->mv_first; i < mv->mv_count; i++) {
uma_zone_t zone = NULL;
int *refcnt;
volatile int *refcnt = mv->mv_vec[i].mi_refcnt;
int type = mbuf_vec_get_type(mv, i);
void *cl = mv->mv_vec[i].mi_base;
int size = mv->mv_vec[i].mi_size;
zone = m_getzone(size);
refcnt = uma_find_refcnt(zone, cl);
if (*refcnt != 1 && atomic_fetchadd_int(refcnt, -1) != 1)
if (refcnt && *refcnt != 1 && atomic_fetchadd_int(refcnt, -1) != 1)
continue;
DPRINTF("freeing idx=%d refcnt=%p type=%d cl=%p\n", i, refcnt, type, cl);
switch (type) {
case EXT_PACKET: /* The packet zone is special. */
if (*refcnt == 0)
*refcnt = 1;
uma_zfree(zone_pack, m);
return; /* Job done. */
case EXT_MBUF:
mb_free_vec_free++;
case EXT_CLUSTER:
case EXT_JUMBOP:
case EXT_JUMBO9:
case EXT_JUMBO16:
zone = m_getzonefromtype(type);
uma_zfree(zone, cl);
continue;
case EXT_SFBUF:
*refcnt = 0;
uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
refcnt));
/* FALLTHROUGH */
case EXT_EXTREF:
#ifdef notyet
KASSERT(m->m_ext.ext_free != NULL,
("%s: ext_free not set", __func__));
(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
m->m_ext.ext_args);
#endif
#ifdef __i386__
sf_buf_mext(cl, mv->mv_vec[i].mi_args);
#else
/*
* XXX
* Every architecture other than i386 uses a vm_page
* for an sf_buf (well ... sparc64 does but shouldn't)
*/
panic("unsupported mbuf_vec type: %d\n", type);
break;
sf_buf_mext(cl, PHYS_TO_VM_PAGE(vtophys(cl)));
#endif
continue;
default:
KASSERT(m->m_ext.ext_type == 0,
("%s: unknown ext_type", __func__));
break;
}
}
/*
* Free this mbuf back to the mbuf zone with all m_ext
* Free this mbuf back to the mbuf zone with all iovec
* information purged.
*/
m->m_flags &= ~M_IOVEC;
mb_free_vec_free++;
uma_zfree(zone_mbuf, m);
}
#if (!defined(__sparc64__) && !defined(__sun4v__))
struct mvec_sg_cb_arg {
bus_dma_segment_t *segs;
int error;
bus_dma_segment_t seg;
int index;
int nseg;
};
@ -263,9 +431,9 @@ mvec_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
struct mvec_sg_cb_arg *cb_arg = arg;
cb_arg->error = error;
cb_arg->seg = segs[0];
cb_arg->segs[cb_arg->index] = segs[0];
cb_arg->nseg = nseg;
KASSERT(nseg == 1, ("nseg=%d", nseg));
}
int
@ -273,19 +441,21 @@ bus_dmamap_load_mvec_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
bus_dma_segment_t *segs, int *nsegs, int flags)
{
int error;
struct mbuf_vec *mv;
struct mvec_sg_cb_arg cb_arg;
M_ASSERTPKTHDR(m0);
M_ASSERTPKTHDR(m0);
if ((m0->m_flags & M_IOVEC) == 0)
return (bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags));
flags |= BUS_DMA_NOWAIT;
*nsegs = 0;
error = 0;
if (m0->m_pkthdr.len <=
dmat->maxsize) {
if (m0->m_pkthdr.len <= dmat->maxsize) {
struct mbuf *m;
cb_arg.segs = segs;
for (m = m0; m != NULL && error == 0; m = m->m_next) {
struct mbuf_vec *mv;
int count, first, i;
if (!(m->m_len > 0))
continue;
@ -293,15 +463,25 @@ bus_dmamap_load_mvec_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
mv = mtomv(m);
count = mv->mv_count;
first = mv->mv_first;
KASSERT(count <= MAX_MBUF_IOV, ("count=%d too large", count));
for (i = first; i < count; i++) {
void *data = mv->mv_vec[i].mi_base;
int size = mv->mv_vec[i].mi_size;
cb_arg.seg = *segs;
void *data = mv->mv_vec[i].mi_base + mv->mv_vec[i].mi_offset;
int size = mv->mv_vec[i].mi_len;
if (size == 0)
continue;
DPRINTF("mapping data=%p size=%d\n", data, size);
cb_arg.index = *nsegs;
error = bus_dmamap_load(dmat, map,
data, size, mvec_cb, &cb_arg, flags);
segs++;
*nsegs++;
(*nsegs)++;
if (*nsegs >= dmat->nsegments) {
DPRINTF("*nsegs=%d dmat->nsegments=%d index=%d\n",
*nsegs, dmat->nsegments, cb_arg.index);
error = EFBIG;
goto err_out;
}
if (error || cb_arg.error)
goto err_out;
}
@ -309,9 +489,6 @@ bus_dmamap_load_mvec_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
} else {
error = EINVAL;
}
/* XXX FIXME: Having to increment nsegs is really annoying */
++*nsegs;
CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
__func__, dmat, dmat->flags, error, *nsegs);
return (error);
@ -322,4 +499,4 @@ err_out:
return (error);
}
#endif /* !__sparc64__ */
#endif /* !__sparc64__ && !__sun4v__ */