Back out the ip_fragment() code -- it is not urgent to have it in now,

I will put it back in in a better form after 5.0 is out.

Requested by: sam, rwatson, luigi (on second thought)
Approved by: re
This commit is contained in:
Luigi Rizzo 2002-11-20 18:56:25 +00:00
parent 82e5a9a354
commit b375c9ec2c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=107112
2 changed files with 189 additions and 216 deletions

View File

@ -116,10 +116,15 @@ extern struct protosw inetsw[];
* The mbuf opt, if present, will not be freed.
*/
int
ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
int flags, struct ip_moptions *imo, struct inpcb *inp)
ip_output(m0, opt, ro, flags, imo, inp)
struct mbuf *m0;
struct mbuf *opt;
struct route *ro;
int flags;
struct ip_moptions *imo;
struct inpcb *inp;
{
struct ip *ip;
struct ip *ip, *mhip;
struct ifnet *ifp = NULL; /* keep compiler happy */
struct mbuf *m;
int hlen = sizeof (struct ip);
@ -453,7 +458,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
goto bad;
}
/* don't allow broadcast messages to be fragmented */
if (ip->ip_len > ifp->if_mtu) {
if ((u_short)ip->ip_len > ifp->if_mtu) {
error = EMSGSIZE;
goto bad;
}
@ -980,7 +985,8 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
* If small enough for interface, or the interface will take
* care of the fragmentation for us, can just send directly.
*/
if (ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) {
if ((u_short)ip->ip_len <= ifp->if_mtu ||
ifp->if_hwassist & CSUM_FRAGMENT) {
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
@ -1002,6 +1008,10 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
(struct sockaddr *)dst, ro->ro_rt);
goto done;
}
/*
* Too large for interface; fragment if possible.
* Must be able to put at least 8 bytes per fragment.
*/
if (ip->ip_off & IP_DF) {
error = EMSGSIZE;
/*
@ -1019,16 +1029,143 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
ipstat.ips_cantfrag++;
goto bad;
}
/*
* Too large for interface; fragment if possible. If successful,
* on return m will point to a list of packets to be sent.
*/
error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
if (error)
len = (ifp->if_mtu - hlen) &~ 7;
if (len < 8) {
error = EMSGSIZE;
goto bad;
for (; m; m = m0) {
}
/*
* if the interface will not calculate checksums on
* fragmented packets, then do it here.
*/
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
(ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
if (len > PAGE_SIZE) {
/*
* Fragement large datagrams such that each segment
* contains a multiple of PAGE_SIZE amount of data,
* plus headers. This enables a receiver to perform
* page-flipping zero-copy optimizations.
*/
int newlen;
struct mbuf *mtmp;
for (mtmp = m, off = 0;
mtmp && ((off + mtmp->m_len) <= ifp->if_mtu);
mtmp = mtmp->m_next) {
off += mtmp->m_len;
}
/*
* firstlen (off - hlen) must be aligned on an
* 8-byte boundary
*/
if (off < hlen)
goto smart_frag_failure;
off = ((off - hlen) & ~7) + hlen;
newlen = (~PAGE_MASK) & ifp->if_mtu;
if ((newlen + sizeof (struct ip)) > ifp->if_mtu) {
/* we failed, go back the default */
smart_frag_failure:
newlen = len;
off = hlen + len;
}
/* printf("ipfrag: len = %d, hlen = %d, mhlen = %d, newlen = %d, off = %d\n",
len, hlen, sizeof (struct ip), newlen, off);*/
len = newlen;
} else {
off = hlen + len;
}
{
int mhlen, firstlen = off - hlen;
struct mbuf **mnext = &m->m_nextpkt;
int nfrags = 1;
/*
* Loop through length of segment after first fragment,
* make new header and copy data of each part and link onto chain.
*/
m0 = m;
mhlen = sizeof (struct ip);
for (; off < (u_short)ip->ip_len; off += len) {
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m == 0) {
error = ENOBUFS;
ipstat.ips_odropped++;
goto sendorfree;
}
m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
m->m_data += max_linkhdr;
mhip = mtod(m, struct ip *);
*mhip = *ip;
if (hlen > sizeof (struct ip)) {
mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
mhip->ip_v = IPVERSION;
mhip->ip_hl = mhlen >> 2;
}
m->m_len = mhlen;
mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
if (off + len >= (u_short)ip->ip_len)
len = (u_short)ip->ip_len - off;
else
mhip->ip_off |= IP_MF;
mhip->ip_len = htons((u_short)(len + mhlen));
m->m_next = m_copy(m0, off, len);
if (m->m_next == 0) {
(void) m_free(m);
error = ENOBUFS; /* ??? */
ipstat.ips_odropped++;
goto sendorfree;
}
m->m_pkthdr.len = mhlen + len;
m->m_pkthdr.rcvif = (struct ifnet *)0;
#ifdef MAC
mac_create_fragment(m0, m);
#endif
m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
mhip->ip_off = htons(mhip->ip_off);
mhip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
mhip->ip_sum = in_cksum(m, mhlen);
*mnext = m;
mnext = &m->m_nextpkt;
nfrags++;
}
ipstat.ips_ofragments += nfrags;
/* set first/last markers for fragment chain */
m->m_flags |= M_LASTFRAG;
m0->m_flags |= M_FIRSTFRAG | M_FRAG;
m0->m_pkthdr.csum_data = nfrags;
/*
* Update first fragment by trimming what's been copied out
* and updating header, then send each fragment (in order).
*/
m = m0;
m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
m->m_pkthdr.len = hlen + firstlen;
ip->ip_len = htons((u_short)m->m_pkthdr.len);
ip->ip_off |= IP_MF;
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
ip->ip_sum = in_cksum(m, hlen);
sendorfree:
for (m = m0; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = NULL;
m->m_nextpkt = 0;
#ifdef IPSEC
/* clean ipsec history once it goes out of the node */
ipsec_delaux(m);
@ -1048,7 +1185,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
if (error == 0)
ipstat.ips_fragmented++;
}
done:
#ifdef IPSEC
if (ro == &iproute && ro->ro_rt) {
@ -1075,187 +1212,6 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
goto done;
}
/*
* Create a chain of fragments which fit the given mtu. m_frag points to the
* mbuf to be fragmented; on return it points to the chain with the fragments.
* Return 0 if no error.
*
* if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
* sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
*/
int
ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
u_long if_hwassist_flags, int sw_csum)
{
int error = 0;
int hlen = ip->ip_hl << 2;
int len = (mtu - hlen) &~ 7; /* size of payload in each fragment */
int off;
struct mbuf *m0 = *m_frag; /* the original packet */
int firstlen;
struct mbuf **mnext;
int nfrags;
if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
ipstat.ips_cantfrag++;
return EMSGSIZE;
}
/*
* Must be able to put at least 8 bytes per fragment.
*/
if (len < 8)
return EMSGSIZE;
/*
* if the interface will not calculate checksums on
* fragmented packets, then do it here.
*/
if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
(if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
in_delayed_cksum(m0);
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
if (len > PAGE_SIZE) {
/*
* Fragment large datagrams such that each segment
* contains a multiple of PAGE_SIZE amount of data,
* plus headers. This enables a receiver to perform
* page-flipping zero-copy optimizations.
*
* XXX When does this help given that sender and receiver
* could have different page sizes, and also mtu could
* be less than the receiver's page size ?
*/
int newlen;
struct mbuf *m;
for (m = m0, off = 0; m && ((off + m->m_len) <= mtu);
m = m->m_next)
off += m->m_len;
/*
* firstlen (off - hlen) must be aligned on an
* 8-byte boundary
*/
if (off < hlen)
goto smart_frag_failure;
off = ((off - hlen) & ~7) + hlen;
newlen = (~PAGE_MASK) & mtu;
if ((newlen + sizeof (struct ip)) > mtu) {
/* we failed, go back the default */
smart_frag_failure:
newlen = len;
off = hlen + len;
}
len = newlen;
} else {
off = hlen + len;
}
firstlen = off - hlen;
mnext = &m0->m_nextpkt; /* pointer to next packet */
/*
* Loop through length of segment after first fragment,
* make new header and copy data of each part and link onto chain.
* Here, m0 is the original packet, m is the fragment being created.
* The fragments are linked off the m_nextpkt of the original
* packet, which after processing serves as the first fragment.
*/
for (nfrags=1; off < ip->ip_len; off += len, nfrags++) {
struct ip *mhip; /* ip header on the fragment */
struct mbuf *m;
int mhlen = sizeof (struct ip);
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m == 0) {
error = ENOBUFS;
ipstat.ips_odropped++;
goto done;
}
m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
/*
* In the first mbuf, leave room for the link header, then
* copy the original IP header including options. The payload
* goes into an additional mbuf chain returned by m_copy().
*/
m->m_data += max_linkhdr;
mhip = mtod(m, struct ip *);
*mhip = *ip;
if (hlen > sizeof (struct ip)) {
mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
mhip->ip_v = IPVERSION;
mhip->ip_hl = mhlen >> 2;
}
m->m_len = mhlen;
/* XXX do we need to add ip->ip_off below ? */
mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
if (off + len >= ip->ip_len) { /* last fragment */
len = ip->ip_len - off;
m->m_flags |= M_LASTFRAG;
} else
mhip->ip_off |= IP_MF;
mhip->ip_len = htons((u_short)(len + mhlen));
m->m_next = m_copy(m0, off, len);
if (m->m_next == 0) { /* copy failed */
m_free(m);
error = ENOBUFS; /* ??? */
ipstat.ips_odropped++;
goto done;
}
m->m_pkthdr.len = mhlen + len;
m->m_pkthdr.rcvif = (struct ifnet *)0;
#ifdef MAC
mac_create_fragment(m0, m);
#endif
m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
mhip->ip_off = htons(mhip->ip_off);
mhip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
mhip->ip_sum = in_cksum(m, mhlen);
*mnext = m;
mnext = &m->m_nextpkt;
}
ipstat.ips_ofragments += nfrags;
/* set first markers for fragment chain */
m0->m_flags |= M_FIRSTFRAG | M_FRAG;
m0->m_pkthdr.csum_data = nfrags;
/*
* Update first fragment by trimming what has been copied out
* and updating header.
*/
m_adj(m0, hlen + firstlen - ip->ip_len);
m0->m_pkthdr.len = hlen + firstlen;
ip->ip_len = htons((u_short)m0->m_pkthdr.len);
ip->ip_off |= IP_MF;
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
ip->ip_sum = in_cksum(m0, hlen);
*m_frag = m0;
done:
if (error) {
struct mbuf *m;
for (m = m0; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = 0;
#ifdef IPSEC
/* clean ipsec history */
ipsec_delaux(m);
#endif
m_freem(m);
}
}
return error;
}
void
in_delayed_cksum(struct mbuf *m)
{
@ -1291,15 +1247,18 @@ in_delayed_cksum(struct mbuf *m)
* XXX This routine assumes that the packet has no options in place.
*/
static struct mbuf *
ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
ip_insertoptions(m, opt, phlen)
register struct mbuf *m;
struct mbuf *opt;
int *phlen;
{
struct ipoption *p = mtod(opt, struct ipoption *);
register struct ipoption *p = mtod(opt, struct ipoption *);
struct mbuf *n;
struct ip *ip = mtod(m, struct ip *);
register struct ip *ip = mtod(m, struct ip *);
unsigned optlen;
optlen = opt->m_len - sizeof(p->ipopt_dst);
if (optlen + ip->ip_len > IP_MAXPACKET) {
if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) {
*phlen = 0;
return (m); /* XXX should fail */
}
@ -1343,9 +1302,10 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
* omitting those not copied during fragmentation.
*/
int
ip_optcopy(struct ip *ip, struct ip *jp)
ip_optcopy(ip, jp)
struct ip *ip, *jp;
{
u_char *cp, *dp;
register u_char *cp, *dp;
int opt, optlen, cnt;
cp = (u_char *)(ip + 1);
@ -1385,7 +1345,9 @@ ip_optcopy(struct ip *ip, struct ip *jp)
* IP socket option processing.
*/
int
ip_ctloutput(struct socket *so, struct sockopt *sopt)
ip_ctloutput(so, sopt)
struct socket *so;
struct sockopt *sopt;
{
struct inpcb *inp = sotoinpcb(so);
int error, optval;
@ -1646,10 +1608,13 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
* with destination address if source routed.
*/
static int
ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
ip_pcbopts(optname, pcbopt, m)
int optname;
struct mbuf **pcbopt;
register struct mbuf *m;
{
int cnt, optlen;
u_char *cp;
register int cnt, optlen;
register u_char *cp;
u_char opt;
/* turn off any old options */
@ -1752,7 +1717,9 @@ ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
* following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
*/
static struct ifnet *
ip_multicast_if(struct in_addr *a, int *ifindexp)
ip_multicast_if(a, ifindexp)
struct in_addr *a;
int *ifindexp;
{
int ifindex;
struct ifnet *ifp;
@ -1776,7 +1743,9 @@ ip_multicast_if(struct in_addr *a, int *ifindexp)
* Set the IP multicast options in response to user setsockopt().
*/
static int
ip_setmoptions(struct sockopt *sopt, struct ip_moptions **imop)
ip_setmoptions(sopt, imop)
struct sockopt *sopt;
struct ip_moptions **imop;
{
int error = 0;
int i;
@ -2072,7 +2041,9 @@ ip_setmoptions(struct sockopt *sopt, struct ip_moptions **imop)
* Return the IP multicast options in response to user getsockopt().
*/
static int
ip_getmoptions(struct sockopt *sopt, struct ip_moptions *imo)
ip_getmoptions(sopt, imo)
struct sockopt *sopt;
register struct ip_moptions *imo;
{
struct in_addr addr;
struct in_ifaddr *ia;
@ -2136,9 +2107,10 @@ ip_getmoptions(struct sockopt *sopt, struct ip_moptions *imo)
* Discard the IP multicast options.
*/
void
ip_freemoptions(struct ip_moptions *imo)
ip_freemoptions(imo)
register struct ip_moptions *imo;
{
int i;
register int i;
if (imo != NULL) {
for (i = 0; i < imo->imo_num_memberships; ++i)
@ -2155,13 +2127,16 @@ ip_freemoptions(struct ip_moptions *imo)
* replicating that code here.
*/
static void
ip_mloopback(struct ifnet *ifp, struct mbuf *m,
struct sockaddr_in *dst, int hlen)
ip_mloopback(ifp, m, dst, hlen)
struct ifnet *ifp;
register struct mbuf *m;
register struct sockaddr_in *dst;
int hlen;
{
struct ip *ip;
register struct ip *ip;
struct mbuf *copym;
copym = m_copypacket(m, M_DONTWAIT);
copym = m_copy(m, 0, M_COPYALL);
if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
copym = m_pullup(copym, hlen);
if (copym != NULL) {

View File

@ -163,8 +163,6 @@ extern struct pr_usrreqs rip_usrreqs;
int ip_ctloutput(struct socket *, struct sockopt *sopt);
void ip_drain(void);
int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
u_long if_hwassist_flags, int sw_csum);
void ip_freemoptions(struct ip_moptions *);
void ip_init(void);
extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,