promote fast ipsec's m_clone routine for public use; it is renamed

m_unshare and the caller can now control how mbufs are allocated

Reviewed by:	andre, luigi, mlaier
MFC after:	1 week
This commit is contained in:
Sam Leffler 2006-03-15 21:11:11 +00:00
parent b4c31113a1
commit 47e2996e8b
8 changed files with 180 additions and 154 deletions

View File

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd November 18, 2005
.Dd March 15, 2006
.Dt MBUF 9
.Os
.\"
@ -132,6 +132,8 @@
.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
.Ft struct mbuf *
.Fn m_defrag "struct mbuf *m0" "int how"
.Ft struct mbuf *
.Fn m_unshare "struct mbuf *m0" "int how"
.\"
.Sh DESCRIPTION
An
@ -886,6 +888,26 @@ depending on the caller's preference.
This function is especially useful in network drivers, where
certain long mbuf chains must be shortened before being added
to TX descriptor lists.
.It Fn m_unshare m0 how
Create a version of the specified mbuf chain whose
contents can be safely modified without affecting other users.
If allocation fails and this operation can not be completed,
.Dv NULL
will be returned.
The original mbuf chain is always reclaimed and the reference
count of any shared mbuf clusters is decremented.
.Fa how
should be either
.Dv M_TRYWAIT
or
.Dv M_DONTWAIT ,
depending on the caller's preference.
As a side-effect of this process the returned
mbuf chain may be compacted.
.Pp
This function is especially useful in the transmit path of
network code, when data must be encrypted or otherwise
altered prior to transmission.
.El
.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
This section currently applies to TCP/IP only.

View File

@ -1679,3 +1679,156 @@ m_align(struct mbuf *m, int len)
adjust = MLEN - len;
m->m_data += adjust &~ (sizeof(long)-1);
}
/*
* Create a writable copy of the mbuf chain. While doing this
* we compact the chain with a goal of producing a chain with
* at most two mbufs. The second mbuf in this chain is likely
* to be a cluster. The primary purpose of this work is to create
* a writable packet for encryption, compression, etc. The
* secondary goal is to linearize the data so the data can be
* passed to crypto hardware in the most efficient manner possible.
*/
struct mbuf *
m_unshare(struct mbuf *m0, int how)
{
struct mbuf *m, *mprev;
struct mbuf *n, *mfirst, *mlast;
int len, off;
mprev = NULL;
for (m = m0; m != NULL; m = mprev->m_next) {
/*
* Regular mbufs are ignored unless there's a cluster
* in front of it that we can use to coalesce. We do
* the latter mainly so later clusters can be coalesced
* also w/o having to handle them specially (i.e. convert
* mbuf+cluster -> cluster). This optimization is heavily
* influenced by the assumption that we're running over
* Ethernet where MCLBYTES is large enough that the max
* packet size will permit lots of coalescing into a
* single cluster. This in turn permits efficient
* crypto operations, especially when using hardware.
*/
if ((m->m_flags & M_EXT) == 0) {
if (mprev && (mprev->m_flags & M_EXT) &&
m->m_len <= M_TRAILINGSPACE(mprev)) {
/* XXX: this ignores mbuf types */
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
mtod(m, caddr_t), m->m_len);
mprev->m_len += m->m_len;
mprev->m_next = m->m_next; /* unlink from chain */
m_free(m); /* reclaim mbuf */
#if 0
newipsecstat.ips_mbcoalesced++;
#endif
} else {
mprev = m;
}
continue;
}
/*
* Writable mbufs are left alone (for now).
*/
if (M_WRITABLE(m)) {
mprev = m;
continue;
}
/*
* Not writable, replace with a copy or coalesce with
* the previous mbuf if possible (since we have to copy
* it anyway, we try to reduce the number of mbufs and
* clusters so that future work is easier).
*/
KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
/* NB: we only coalesce into a cluster or larger */
if (mprev != NULL && (mprev->m_flags & M_EXT) &&
m->m_len <= M_TRAILINGSPACE(mprev)) {
/* XXX: this ignores mbuf types */
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
mtod(m, caddr_t), m->m_len);
mprev->m_len += m->m_len;
mprev->m_next = m->m_next; /* unlink from chain */
m_free(m); /* reclaim mbuf */
#if 0
newipsecstat.ips_clcoalesced++;
#endif
continue;
}
/*
* Allocate new space to hold the copy...
*/
/* XXX why can M_PKTHDR be set past the first mbuf? */
if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
/*
* NB: if a packet header is present we must
* allocate the mbuf separately from any cluster
* because M_MOVE_PKTHDR will smash the data
* pointer and drop the M_EXT marker.
*/
MGETHDR(n, how, m->m_type);
if (n == NULL) {
m_freem(m0);
return (NULL);
}
M_MOVE_PKTHDR(n, m);
MCLGET(n, how);
if ((n->m_flags & M_EXT) == 0) {
m_free(n);
m_freem(m0);
return (NULL);
}
} else {
n = m_getcl(how, m->m_type, m->m_flags);
if (n == NULL) {
m_freem(m0);
return (NULL);
}
}
/*
* ... and copy the data. We deal with jumbo mbufs
* (i.e. m_len > MCLBYTES) by splitting them into
* clusters. We could just malloc a buffer and make
* it external but too many device drivers don't know
* how to break up the non-contiguous memory when
* doing DMA.
*/
len = m->m_len;
off = 0;
mfirst = n;
mlast = NULL;
for (;;) {
int cc = min(len, MCLBYTES);
memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
n->m_len = cc;
if (mlast != NULL)
mlast->m_next = n;
mlast = n;
#if 0
newipsecstat.ips_clcopied++;
#endif
len -= cc;
if (len <= 0)
break;
off += cc;
n = m_getcl(how, m->m_type, m->m_flags);
if (n == NULL) {
m_freem(mfirst);
m_freem(m0);
return (NULL);
}
}
n->m_next = m->m_next;
if (mprev == NULL)
m0 = mfirst; /* new head of chain */
else
mprev->m_next = mfirst; /* replace old mbuf */
m_free(m); /* release old mbuf */
mprev = mfirst;
}
return (m0);
}

View File

@ -410,7 +410,6 @@ extern struct mbuf *ipsec_copypkt __P((struct mbuf *));
extern void m_checkalignment(const char* where, struct mbuf *m0,
int off, int len);
extern struct mbuf *m_clone(struct mbuf *m0);
extern struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off);
extern caddr_t m_pad(struct mbuf *m, int n);
extern int m_striphdr(struct mbuf *m, int skip, int hlen);

View File

@ -42,155 +42,6 @@
#include <netipsec/ipsec.h>
/*
* Create a writable copy of the mbuf chain. While doing this
* we compact the chain with a goal of producing a chain with
* at most two mbufs. The second mbuf in this chain is likely
* to be a cluster. The primary purpose of this work is to create
* a writable packet for encryption, compression, etc. The
* secondary goal is to linearize the data so the data can be
* passed to crypto hardware in the most efficient manner possible.
*/
struct mbuf *
m_clone(struct mbuf *m0)
{
struct mbuf *m, *mprev;
struct mbuf *n, *mfirst, *mlast;
int len, off;
IPSEC_ASSERT(m0 != NULL, ("null mbuf"));
mprev = NULL;
for (m = m0; m != NULL; m = mprev->m_next) {
/*
* Regular mbufs are ignored unless there's a cluster
* in front of it that we can use to coalesce. We do
* the latter mainly so later clusters can be coalesced
* also w/o having to handle them specially (i.e. convert
* mbuf+cluster -> cluster). This optimization is heavily
* influenced by the assumption that we're running over
* Ethernet where MCLBYTES is large enough that the max
* packet size will permit lots of coalescing into a
* single cluster. This in turn permits efficient
* crypto operations, especially when using hardware.
*/
if ((m->m_flags & M_EXT) == 0) {
if (mprev && (mprev->m_flags & M_EXT) &&
m->m_len <= M_TRAILINGSPACE(mprev)) {
/* XXX: this ignores mbuf types */
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
mtod(m, caddr_t), m->m_len);
mprev->m_len += m->m_len;
mprev->m_next = m->m_next; /* unlink from chain */
m_free(m); /* reclaim mbuf */
newipsecstat.ips_mbcoalesced++;
} else {
mprev = m;
}
continue;
}
/*
* Writable mbufs are left alone (for now).
*/
if (M_WRITABLE(m)) {
mprev = m;
continue;
}
/*
* Not writable, replace with a copy or coalesce with
* the previous mbuf if possible (since we have to copy
* it anyway, we try to reduce the number of mbufs and
* clusters so that future work is easier).
*/
IPSEC_ASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
/* NB: we only coalesce into a cluster or larger */
if (mprev != NULL && (mprev->m_flags & M_EXT) &&
m->m_len <= M_TRAILINGSPACE(mprev)) {
/* XXX: this ignores mbuf types */
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
mtod(m, caddr_t), m->m_len);
mprev->m_len += m->m_len;
mprev->m_next = m->m_next; /* unlink from chain */
m_free(m); /* reclaim mbuf */
newipsecstat.ips_clcoalesced++;
continue;
}
/*
* Allocate new space to hold the copy...
*/
/* XXX why can M_PKTHDR be set past the first mbuf? */
if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
/*
* NB: if a packet header is present we must
* allocate the mbuf separately from any cluster
* because M_MOVE_PKTHDR will smash the data
* pointer and drop the M_EXT marker.
*/
MGETHDR(n, M_DONTWAIT, m->m_type);
if (n == NULL) {
m_freem(m0);
return (NULL);
}
M_MOVE_PKTHDR(n, m);
MCLGET(n, M_DONTWAIT);
if ((n->m_flags & M_EXT) == 0) {
m_free(n);
m_freem(m0);
return (NULL);
}
} else {
n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
if (n == NULL) {
m_freem(m0);
return (NULL);
}
}
/*
* ... and copy the data. We deal with jumbo mbufs
* (i.e. m_len > MCLBYTES) by splitting them into
* clusters. We could just malloc a buffer and make
* it external but too many device drivers don't know
* how to break up the non-contiguous memory when
* doing DMA.
*/
len = m->m_len;
off = 0;
mfirst = n;
mlast = NULL;
for (;;) {
int cc = min(len, MCLBYTES);
memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
n->m_len = cc;
if (mlast != NULL)
mlast->m_next = n;
mlast = n;
newipsecstat.ips_clcopied++;
len -= cc;
if (len <= 0)
break;
off += cc;
n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
if (n == NULL) {
m_freem(mfirst);
m_freem(m0);
return (NULL);
}
}
n->m_next = m->m_next;
if (mprev == NULL)
m0 = mfirst; /* new head of chain */
else
mprev->m_next = mfirst; /* replace old mbuf */
m_free(m); /* release old mbuf */
mprev = mfirst;
}
return (m0);
}
/*
* Make space for a new header of length hlen at skip bytes
* into the packet. When doing this we allocate new mbufs only

View File

@ -942,7 +942,7 @@ ah_output(
/* Update the counters. */
ahstat.ahs_obytes += m->m_pkthdr.len - skip;
m = m_clone(m);
m = m_unshare(m, M_NOWAIT);
if (m == NULL) {
DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__,
ipsec_address(&sav->sah->saidx.dst),

View File

@ -713,7 +713,7 @@ esp_output(
/* Update the counters. */
espstat.esps_obytes += m->m_pkthdr.len - skip;
m = m_clone(m);
m = m_unshare(m, M_NOWAIT);
if (m == NULL) {
DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__,
ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi)));

View File

@ -385,7 +385,7 @@ ipcomp_output(
/* Update the counters */
ipcompstat.ipcomps_obytes += m->m_pkthdr.len - skip;
m = m_clone(m);
m = m_unshare(m, M_NOWAIT);
if (m == NULL) {
ipcompstat.ipcomps_hdrops++;
DPRINTF(("%s: cannot clone mbuf chain, IPCA %s/%08lx\n",

View File

@ -677,6 +677,7 @@ struct mbuf *m_pullup(struct mbuf *, int);
int m_sanity(struct mbuf *, int);
struct mbuf *m_split(struct mbuf *, int, int);
struct mbuf *m_uiotombuf(struct uio *, int, int, int);
struct mbuf *m_unshare(struct mbuf *, int how);
/*-
* Network packets may have annotations attached by affixing a list