promote fast ipsec's m_clone routine for public use; it is renamed
m_unshare and the caller can now control how mbufs are allocated Reviewed by: andre, luigi, mlaier MFC after: 1 week
This commit is contained in:
parent
b4c31113a1
commit
47e2996e8b
@ -24,7 +24,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd November 18, 2005
|
||||
.Dd March 15, 2006
|
||||
.Dt MBUF 9
|
||||
.Os
|
||||
.\"
|
||||
@ -132,6 +132,8 @@
|
||||
.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
|
||||
.Ft struct mbuf *
|
||||
.Fn m_defrag "struct mbuf *m0" "int how"
|
||||
.Ft struct mbuf *
|
||||
.Fn m_unshare "struct mbuf *m0" "int how"
|
||||
.\"
|
||||
.Sh DESCRIPTION
|
||||
An
|
||||
@ -886,6 +888,26 @@ depending on the caller's preference.
|
||||
This function is especially useful in network drivers, where
|
||||
certain long mbuf chains must be shortened before being added
|
||||
to TX descriptor lists.
|
||||
.It Fn m_unshare m0 how
|
||||
Create a version of the specified mbuf chain whose
|
||||
contents can be safely modified without affecting other users.
|
||||
If allocation fails and this operation can not be completed,
|
||||
.Dv NULL
|
||||
will be returned.
|
||||
The original mbuf chain is always reclaimed and the reference
|
||||
count of any shared mbuf clusters is decremented.
|
||||
.Fa how
|
||||
should be either
|
||||
.Dv M_TRYWAIT
|
||||
or
|
||||
.Dv M_DONTWAIT ,
|
||||
depending on the caller's preference.
|
||||
As a side-effect of this process the returned
|
||||
mbuf chain may be compacted.
|
||||
.Pp
|
||||
This function is especially useful in the transmit path of
|
||||
network code, when data must be encrypted or otherwise
|
||||
altered prior to transmission.
|
||||
.El
|
||||
.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
|
||||
This section currently applies to TCP/IP only.
|
||||
|
@ -1679,3 +1679,156 @@ m_align(struct mbuf *m, int len)
|
||||
adjust = MLEN - len;
|
||||
m->m_data += adjust &~ (sizeof(long)-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a writable copy of the mbuf chain. While doing this
|
||||
* we compact the chain with a goal of producing a chain with
|
||||
* at most two mbufs. The second mbuf in this chain is likely
|
||||
* to be a cluster. The primary purpose of this work is to create
|
||||
* a writable packet for encryption, compression, etc. The
|
||||
* secondary goal is to linearize the data so the data can be
|
||||
* passed to crypto hardware in the most efficient manner possible.
|
||||
*/
|
||||
struct mbuf *
|
||||
m_unshare(struct mbuf *m0, int how)
|
||||
{
|
||||
struct mbuf *m, *mprev;
|
||||
struct mbuf *n, *mfirst, *mlast;
|
||||
int len, off;
|
||||
|
||||
mprev = NULL;
|
||||
for (m = m0; m != NULL; m = mprev->m_next) {
|
||||
/*
|
||||
* Regular mbufs are ignored unless there's a cluster
|
||||
* in front of it that we can use to coalesce. We do
|
||||
* the latter mainly so later clusters can be coalesced
|
||||
* also w/o having to handle them specially (i.e. convert
|
||||
* mbuf+cluster -> cluster). This optimization is heavily
|
||||
* influenced by the assumption that we're running over
|
||||
* Ethernet where MCLBYTES is large enough that the max
|
||||
* packet size will permit lots of coalescing into a
|
||||
* single cluster. This in turn permits efficient
|
||||
* crypto operations, especially when using hardware.
|
||||
*/
|
||||
if ((m->m_flags & M_EXT) == 0) {
|
||||
if (mprev && (mprev->m_flags & M_EXT) &&
|
||||
m->m_len <= M_TRAILINGSPACE(mprev)) {
|
||||
/* XXX: this ignores mbuf types */
|
||||
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
|
||||
mtod(m, caddr_t), m->m_len);
|
||||
mprev->m_len += m->m_len;
|
||||
mprev->m_next = m->m_next; /* unlink from chain */
|
||||
m_free(m); /* reclaim mbuf */
|
||||
#if 0
|
||||
newipsecstat.ips_mbcoalesced++;
|
||||
#endif
|
||||
} else {
|
||||
mprev = m;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Writable mbufs are left alone (for now).
|
||||
*/
|
||||
if (M_WRITABLE(m)) {
|
||||
mprev = m;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Not writable, replace with a copy or coalesce with
|
||||
* the previous mbuf if possible (since we have to copy
|
||||
* it anyway, we try to reduce the number of mbufs and
|
||||
* clusters so that future work is easier).
|
||||
*/
|
||||
KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
|
||||
/* NB: we only coalesce into a cluster or larger */
|
||||
if (mprev != NULL && (mprev->m_flags & M_EXT) &&
|
||||
m->m_len <= M_TRAILINGSPACE(mprev)) {
|
||||
/* XXX: this ignores mbuf types */
|
||||
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
|
||||
mtod(m, caddr_t), m->m_len);
|
||||
mprev->m_len += m->m_len;
|
||||
mprev->m_next = m->m_next; /* unlink from chain */
|
||||
m_free(m); /* reclaim mbuf */
|
||||
#if 0
|
||||
newipsecstat.ips_clcoalesced++;
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate new space to hold the copy...
|
||||
*/
|
||||
/* XXX why can M_PKTHDR be set past the first mbuf? */
|
||||
if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
|
||||
/*
|
||||
* NB: if a packet header is present we must
|
||||
* allocate the mbuf separately from any cluster
|
||||
* because M_MOVE_PKTHDR will smash the data
|
||||
* pointer and drop the M_EXT marker.
|
||||
*/
|
||||
MGETHDR(n, how, m->m_type);
|
||||
if (n == NULL) {
|
||||
m_freem(m0);
|
||||
return (NULL);
|
||||
}
|
||||
M_MOVE_PKTHDR(n, m);
|
||||
MCLGET(n, how);
|
||||
if ((n->m_flags & M_EXT) == 0) {
|
||||
m_free(n);
|
||||
m_freem(m0);
|
||||
return (NULL);
|
||||
}
|
||||
} else {
|
||||
n = m_getcl(how, m->m_type, m->m_flags);
|
||||
if (n == NULL) {
|
||||
m_freem(m0);
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* ... and copy the data. We deal with jumbo mbufs
|
||||
* (i.e. m_len > MCLBYTES) by splitting them into
|
||||
* clusters. We could just malloc a buffer and make
|
||||
* it external but too many device drivers don't know
|
||||
* how to break up the non-contiguous memory when
|
||||
* doing DMA.
|
||||
*/
|
||||
len = m->m_len;
|
||||
off = 0;
|
||||
mfirst = n;
|
||||
mlast = NULL;
|
||||
for (;;) {
|
||||
int cc = min(len, MCLBYTES);
|
||||
memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
|
||||
n->m_len = cc;
|
||||
if (mlast != NULL)
|
||||
mlast->m_next = n;
|
||||
mlast = n;
|
||||
#if 0
|
||||
newipsecstat.ips_clcopied++;
|
||||
#endif
|
||||
|
||||
len -= cc;
|
||||
if (len <= 0)
|
||||
break;
|
||||
off += cc;
|
||||
|
||||
n = m_getcl(how, m->m_type, m->m_flags);
|
||||
if (n == NULL) {
|
||||
m_freem(mfirst);
|
||||
m_freem(m0);
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
n->m_next = m->m_next;
|
||||
if (mprev == NULL)
|
||||
m0 = mfirst; /* new head of chain */
|
||||
else
|
||||
mprev->m_next = mfirst; /* replace old mbuf */
|
||||
m_free(m); /* release old mbuf */
|
||||
mprev = mfirst;
|
||||
}
|
||||
return (m0);
|
||||
}
|
||||
|
@ -410,7 +410,6 @@ extern struct mbuf *ipsec_copypkt __P((struct mbuf *));
|
||||
|
||||
extern void m_checkalignment(const char* where, struct mbuf *m0,
|
||||
int off, int len);
|
||||
extern struct mbuf *m_clone(struct mbuf *m0);
|
||||
extern struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off);
|
||||
extern caddr_t m_pad(struct mbuf *m, int n);
|
||||
extern int m_striphdr(struct mbuf *m, int skip, int hlen);
|
||||
|
@ -42,155 +42,6 @@
|
||||
|
||||
#include <netipsec/ipsec.h>
|
||||
|
||||
/*
|
||||
* Create a writable copy of the mbuf chain. While doing this
|
||||
* we compact the chain with a goal of producing a chain with
|
||||
* at most two mbufs. The second mbuf in this chain is likely
|
||||
* to be a cluster. The primary purpose of this work is to create
|
||||
* a writable packet for encryption, compression, etc. The
|
||||
* secondary goal is to linearize the data so the data can be
|
||||
* passed to crypto hardware in the most efficient manner possible.
|
||||
*/
|
||||
struct mbuf *
|
||||
m_clone(struct mbuf *m0)
|
||||
{
|
||||
struct mbuf *m, *mprev;
|
||||
struct mbuf *n, *mfirst, *mlast;
|
||||
int len, off;
|
||||
|
||||
IPSEC_ASSERT(m0 != NULL, ("null mbuf"));
|
||||
|
||||
mprev = NULL;
|
||||
for (m = m0; m != NULL; m = mprev->m_next) {
|
||||
/*
|
||||
* Regular mbufs are ignored unless there's a cluster
|
||||
* in front of it that we can use to coalesce. We do
|
||||
* the latter mainly so later clusters can be coalesced
|
||||
* also w/o having to handle them specially (i.e. convert
|
||||
* mbuf+cluster -> cluster). This optimization is heavily
|
||||
* influenced by the assumption that we're running over
|
||||
* Ethernet where MCLBYTES is large enough that the max
|
||||
* packet size will permit lots of coalescing into a
|
||||
* single cluster. This in turn permits efficient
|
||||
* crypto operations, especially when using hardware.
|
||||
*/
|
||||
if ((m->m_flags & M_EXT) == 0) {
|
||||
if (mprev && (mprev->m_flags & M_EXT) &&
|
||||
m->m_len <= M_TRAILINGSPACE(mprev)) {
|
||||
/* XXX: this ignores mbuf types */
|
||||
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
|
||||
mtod(m, caddr_t), m->m_len);
|
||||
mprev->m_len += m->m_len;
|
||||
mprev->m_next = m->m_next; /* unlink from chain */
|
||||
m_free(m); /* reclaim mbuf */
|
||||
newipsecstat.ips_mbcoalesced++;
|
||||
} else {
|
||||
mprev = m;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Writable mbufs are left alone (for now).
|
||||
*/
|
||||
if (M_WRITABLE(m)) {
|
||||
mprev = m;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Not writable, replace with a copy or coalesce with
|
||||
* the previous mbuf if possible (since we have to copy
|
||||
* it anyway, we try to reduce the number of mbufs and
|
||||
* clusters so that future work is easier).
|
||||
*/
|
||||
IPSEC_ASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
|
||||
/* NB: we only coalesce into a cluster or larger */
|
||||
if (mprev != NULL && (mprev->m_flags & M_EXT) &&
|
||||
m->m_len <= M_TRAILINGSPACE(mprev)) {
|
||||
/* XXX: this ignores mbuf types */
|
||||
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
|
||||
mtod(m, caddr_t), m->m_len);
|
||||
mprev->m_len += m->m_len;
|
||||
mprev->m_next = m->m_next; /* unlink from chain */
|
||||
m_free(m); /* reclaim mbuf */
|
||||
newipsecstat.ips_clcoalesced++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate new space to hold the copy...
|
||||
*/
|
||||
/* XXX why can M_PKTHDR be set past the first mbuf? */
|
||||
if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
|
||||
/*
|
||||
* NB: if a packet header is present we must
|
||||
* allocate the mbuf separately from any cluster
|
||||
* because M_MOVE_PKTHDR will smash the data
|
||||
* pointer and drop the M_EXT marker.
|
||||
*/
|
||||
MGETHDR(n, M_DONTWAIT, m->m_type);
|
||||
if (n == NULL) {
|
||||
m_freem(m0);
|
||||
return (NULL);
|
||||
}
|
||||
M_MOVE_PKTHDR(n, m);
|
||||
MCLGET(n, M_DONTWAIT);
|
||||
if ((n->m_flags & M_EXT) == 0) {
|
||||
m_free(n);
|
||||
m_freem(m0);
|
||||
return (NULL);
|
||||
}
|
||||
} else {
|
||||
n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
|
||||
if (n == NULL) {
|
||||
m_freem(m0);
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* ... and copy the data. We deal with jumbo mbufs
|
||||
* (i.e. m_len > MCLBYTES) by splitting them into
|
||||
* clusters. We could just malloc a buffer and make
|
||||
* it external but too many device drivers don't know
|
||||
* how to break up the non-contiguous memory when
|
||||
* doing DMA.
|
||||
*/
|
||||
len = m->m_len;
|
||||
off = 0;
|
||||
mfirst = n;
|
||||
mlast = NULL;
|
||||
for (;;) {
|
||||
int cc = min(len, MCLBYTES);
|
||||
memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
|
||||
n->m_len = cc;
|
||||
if (mlast != NULL)
|
||||
mlast->m_next = n;
|
||||
mlast = n;
|
||||
newipsecstat.ips_clcopied++;
|
||||
|
||||
len -= cc;
|
||||
if (len <= 0)
|
||||
break;
|
||||
off += cc;
|
||||
|
||||
n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
|
||||
if (n == NULL) {
|
||||
m_freem(mfirst);
|
||||
m_freem(m0);
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
n->m_next = m->m_next;
|
||||
if (mprev == NULL)
|
||||
m0 = mfirst; /* new head of chain */
|
||||
else
|
||||
mprev->m_next = mfirst; /* replace old mbuf */
|
||||
m_free(m); /* release old mbuf */
|
||||
mprev = mfirst;
|
||||
}
|
||||
return (m0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make space for a new header of length hlen at skip bytes
|
||||
* into the packet. When doing this we allocate new mbufs only
|
||||
|
@ -942,7 +942,7 @@ ah_output(
|
||||
/* Update the counters. */
|
||||
ahstat.ahs_obytes += m->m_pkthdr.len - skip;
|
||||
|
||||
m = m_clone(m);
|
||||
m = m_unshare(m, M_NOWAIT);
|
||||
if (m == NULL) {
|
||||
DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__,
|
||||
ipsec_address(&sav->sah->saidx.dst),
|
||||
|
@ -713,7 +713,7 @@ esp_output(
|
||||
/* Update the counters. */
|
||||
espstat.esps_obytes += m->m_pkthdr.len - skip;
|
||||
|
||||
m = m_clone(m);
|
||||
m = m_unshare(m, M_NOWAIT);
|
||||
if (m == NULL) {
|
||||
DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__,
|
||||
ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi)));
|
||||
|
@ -385,7 +385,7 @@ ipcomp_output(
|
||||
/* Update the counters */
|
||||
ipcompstat.ipcomps_obytes += m->m_pkthdr.len - skip;
|
||||
|
||||
m = m_clone(m);
|
||||
m = m_unshare(m, M_NOWAIT);
|
||||
if (m == NULL) {
|
||||
ipcompstat.ipcomps_hdrops++;
|
||||
DPRINTF(("%s: cannot clone mbuf chain, IPCA %s/%08lx\n",
|
||||
|
@ -677,6 +677,7 @@ struct mbuf *m_pullup(struct mbuf *, int);
|
||||
int m_sanity(struct mbuf *, int);
|
||||
struct mbuf *m_split(struct mbuf *, int, int);
|
||||
struct mbuf *m_uiotombuf(struct uio *, int, int, int);
|
||||
struct mbuf *m_unshare(struct mbuf *, int how);
|
||||
|
||||
/*-
|
||||
* Network packets may have annotations attached by affixing a list
|
||||
|
Loading…
Reference in New Issue
Block a user