From b375c9ec2ca23ffdb4e664d705b9664a39f82d06 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo <luigi@FreeBSD.org> Date: Wed, 20 Nov 2002 18:56:25 +0000 Subject: [PATCH] Back out the ip_fragment() code -- it is not urgent to have it in now, I will put it back in in a better form after 5.0 is out. Requested by: sam, rwatson, luigi (on second thought) Approved by: re --- sys/netinet/ip_output.c | 403 +++++++++++++++++++--------------------- sys/netinet/ip_var.h | 2 - 2 files changed, 189 insertions(+), 216 deletions(-) diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 1ed5459e02de..4c190900e470 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -116,10 +116,15 @@ extern struct protosw inetsw[]; * The mbuf opt, if present, will not be freed. */ int -ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, - int flags, struct ip_moptions *imo, struct inpcb *inp) +ip_output(m0, opt, ro, flags, imo, inp) + struct mbuf *m0; + struct mbuf *opt; + struct route *ro; + int flags; + struct ip_moptions *imo; + struct inpcb *inp; { - struct ip *ip; + struct ip *ip, *mhip; struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m; int hlen = sizeof (struct ip); @@ -453,7 +458,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, goto bad; } /* don't allow broadcast messages to be fragmented */ - if (ip->ip_len > ifp->if_mtu) { + if ((u_short)ip->ip_len > ifp->if_mtu) { error = EMSGSIZE; goto bad; } @@ -980,7 +985,8 @@ pass: * If small enough for interface, or the interface will take * care of the fragmentation for us, can just send directly. */ - if (ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) { + if ((u_short)ip->ip_len <= ifp->if_mtu || + ifp->if_hwassist & CSUM_FRAGMENT) { ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; @@ -1002,6 +1008,10 @@ pass: (struct sockaddr *)dst, ro->ro_rt); goto done; } + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ if (ip->ip_off & IP_DF) { error = EMSGSIZE; /* @@ -1019,16 +1029,143 @@ pass: ipstat.ips_cantfrag++; goto bad; } - /* - * Too large for interface; fragment if possible. If successful, - * on return m will point to a list of packets to be sent. - */ - error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); - if (error) + len = (ifp->if_mtu - hlen) &~ 7; + if (len < 8) { + error = EMSGSIZE; goto bad; - for (; m; m = m0) { + } + + /* + * if the interface will not calculate checksums on + * fragmented packets, then do it here. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && + (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } + + if (len > PAGE_SIZE) { + /* + * Fragement large datagrams such that each segment + * contains a multiple of PAGE_SIZE amount of data, + * plus headers. This enables a receiver to perform + * page-flipping zero-copy optimizations. + */ + + int newlen; + struct mbuf *mtmp; + + for (mtmp = m, off = 0; + mtmp && ((off + mtmp->m_len) <= ifp->if_mtu); + mtmp = mtmp->m_next) { + off += mtmp->m_len; + } + /* + * firstlen (off - hlen) must be aligned on an + * 8-byte boundary + */ + if (off < hlen) + goto smart_frag_failure; + off = ((off - hlen) & ~7) + hlen; + newlen = (~PAGE_MASK) & ifp->if_mtu; + if ((newlen + sizeof (struct ip)) > ifp->if_mtu) { + /* we failed, go back the default */ +smart_frag_failure: + newlen = len; + off = hlen + len; + } + +/* printf("ipfrag: len = %d, hlen = %d, mhlen = %d, newlen = %d, off = %d\n", + len, hlen, sizeof (struct ip), newlen, off);*/ + + len = newlen; + + } else { + off = hlen + len; + } + + + + { + int mhlen, firstlen = off - hlen; + struct mbuf **mnext = &m->m_nextpkt; + int nfrags = 1; + + /* + * Loop through length of segment after first fragment, + * make new header and copy data of each part and link onto chain. + */ + m0 = m; + mhlen = sizeof (struct ip); + for (; off < (u_short)ip->ip_len; off += len) { + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == 0) { + error = ENOBUFS; + ipstat.ips_odropped++; + goto sendorfree; + } + m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; + m->m_data += max_linkhdr; + mhip = mtod(m, struct ip *); + *mhip = *ip; + if (hlen > sizeof (struct ip)) { + mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); + mhip->ip_v = IPVERSION; + mhip->ip_hl = mhlen >> 2; + } + m->m_len = mhlen; + mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; + if (off + len >= (u_short)ip->ip_len) + len = (u_short)ip->ip_len - off; + else + mhip->ip_off |= IP_MF; + mhip->ip_len = htons((u_short)(len + mhlen)); + m->m_next = m_copy(m0, off, len); + if (m->m_next == 0) { + (void) m_free(m); + error = ENOBUFS; /* ??? */ + ipstat.ips_odropped++; + goto sendorfree; + } + m->m_pkthdr.len = mhlen + len; + m->m_pkthdr.rcvif = (struct ifnet *)0; +#ifdef MAC + mac_create_fragment(m0, m); +#endif + m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; + mhip->ip_off = htons(mhip->ip_off); + mhip->ip_sum = 0; + if (sw_csum & CSUM_DELAY_IP) + mhip->ip_sum = in_cksum(m, mhlen); + *mnext = m; + mnext = &m->m_nextpkt; + nfrags++; + } + ipstat.ips_ofragments += nfrags; + + /* set first/last markers for fragment chain */ + m->m_flags |= M_LASTFRAG; + m0->m_flags |= M_FIRSTFRAG | M_FRAG; + m0->m_pkthdr.csum_data = nfrags; + + /* + * Update first fragment by trimming what's been copied out + * and updating header, then send each fragment (in order). + */ + m = m0; + m_adj(m, hlen + firstlen - (u_short)ip->ip_len); + m->m_pkthdr.len = hlen + firstlen; + ip->ip_len = htons((u_short)m->m_pkthdr.len); + ip->ip_off |= IP_MF; + ip->ip_off = htons(ip->ip_off); + ip->ip_sum = 0; + if (sw_csum & CSUM_DELAY_IP) + ip->ip_sum = in_cksum(m, hlen); +sendorfree: + for (m = m0; m; m = m0) { m0 = m->m_nextpkt; - m->m_nextpkt = NULL; + m->m_nextpkt = 0; #ifdef IPSEC /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); @@ -1048,7 +1185,7 @@ pass: if (error == 0) ipstat.ips_fragmented++; - + } done: #ifdef IPSEC if (ro == &iproute && ro->ro_rt) { @@ -1075,187 +1212,6 @@ bad: goto done; } -/* - * Create a chain of fragments which fit the given mtu. m_frag points to the - * mbuf to be fragmented; on return it points to the chain with the fragments. - * Return 0 if no error. - * - * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) - * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). - */ -int -ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, - u_long if_hwassist_flags, int sw_csum) -{ - int error = 0; - int hlen = ip->ip_hl << 2; - int len = (mtu - hlen) &~ 7; /* size of payload in each fragment */ - int off; - struct mbuf *m0 = *m_frag; /* the original packet */ - int firstlen; - struct mbuf **mnext; - int nfrags; - - if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ - ipstat.ips_cantfrag++; - return EMSGSIZE; - } - - /* - * Must be able to put at least 8 bytes per fragment. - */ - if (len < 8) - return EMSGSIZE; - - /* - * if the interface will not calculate checksums on - * fragmented packets, then do it here. - */ - if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && - (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { - in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } - - if (len > PAGE_SIZE) { - /* - * Fragment large datagrams such that each segment - * contains a multiple of PAGE_SIZE amount of data, - * plus headers. This enables a receiver to perform - * page-flipping zero-copy optimizations. - * - * XXX When does this help given that sender and receiver - * could have different page sizes, and also mtu could - * be less than the receiver's page size ? - */ - - int newlen; - struct mbuf *m; - - for (m = m0, off = 0; m && ((off + m->m_len) <= mtu); - m = m->m_next) - off += m->m_len; - /* - * firstlen (off - hlen) must be aligned on an - * 8-byte boundary - */ - if (off < hlen) - goto smart_frag_failure; - off = ((off - hlen) & ~7) + hlen; - newlen = (~PAGE_MASK) & mtu; - if ((newlen + sizeof (struct ip)) > mtu) { - /* we failed, go back the default */ -smart_frag_failure: - newlen = len; - off = hlen + len; - } - len = newlen; - - } else { - off = hlen + len; - } - - firstlen = off - hlen; - mnext = &m0->m_nextpkt; /* pointer to next packet */ - - /* - * Loop through length of segment after first fragment, - * make new header and copy data of each part and link onto chain. - * Here, m0 is the original packet, m is the fragment being created. - * The fragments are linked off the m_nextpkt of the original - * packet, which after processing serves as the first fragment. - */ - for (nfrags=1; off < ip->ip_len; off += len, nfrags++) { - struct ip *mhip; /* ip header on the fragment */ - struct mbuf *m; - int mhlen = sizeof (struct ip); - - MGETHDR(m, M_DONTWAIT, MT_HEADER); - if (m == 0) { - error = ENOBUFS; - ipstat.ips_odropped++; - goto done; - } - m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; - /* - * In the first mbuf, leave room for the link header, then - * copy the original IP header including options. The payload - * goes into an additional mbuf chain returned by m_copy(). - */ - m->m_data += max_linkhdr; - mhip = mtod(m, struct ip *); - *mhip = *ip; - if (hlen > sizeof (struct ip)) { - mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); - mhip->ip_v = IPVERSION; - mhip->ip_hl = mhlen >> 2; - } - m->m_len = mhlen; - /* XXX do we need to add ip->ip_off below ? */ - mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; - if (off + len >= ip->ip_len) { /* last fragment */ - len = ip->ip_len - off; - m->m_flags |= M_LASTFRAG; - } else - mhip->ip_off |= IP_MF; - mhip->ip_len = htons((u_short)(len + mhlen)); - m->m_next = m_copy(m0, off, len); - if (m->m_next == 0) { /* copy failed */ - m_free(m); - error = ENOBUFS; /* ??? */ - ipstat.ips_odropped++; - goto done; - } - m->m_pkthdr.len = mhlen + len; - m->m_pkthdr.rcvif = (struct ifnet *)0; -#ifdef MAC - mac_create_fragment(m0, m); -#endif - m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; - mhip->ip_off = htons(mhip->ip_off); - mhip->ip_sum = 0; - if (sw_csum & CSUM_DELAY_IP) - mhip->ip_sum = in_cksum(m, mhlen); - *mnext = m; - mnext = &m->m_nextpkt; - } - ipstat.ips_ofragments += nfrags; - - /* set first markers for fragment chain */ - m0->m_flags |= M_FIRSTFRAG | M_FRAG; - m0->m_pkthdr.csum_data = nfrags; - - /* - * Update first fragment by trimming what has been copied out - * and updating header. - */ - m_adj(m0, hlen + firstlen - ip->ip_len); - m0->m_pkthdr.len = hlen + firstlen; - ip->ip_len = htons((u_short)m0->m_pkthdr.len); - ip->ip_off |= IP_MF; - ip->ip_off = htons(ip->ip_off); - ip->ip_sum = 0; - if (sw_csum & CSUM_DELAY_IP) - ip->ip_sum = in_cksum(m0, hlen); - *m_frag = m0; - -done: - if (error) { - struct mbuf *m; - - for (m = m0; m; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = 0; -#ifdef IPSEC - /* clean ipsec history */ - ipsec_delaux(m); -#endif - m_freem(m); - } - } - return error; -} - void in_delayed_cksum(struct mbuf *m) { @@ -1291,15 +1247,18 @@ in_delayed_cksum(struct mbuf *m) * XXX This routine assumes that the packet has no options in place. */ static struct mbuf * -ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) +ip_insertoptions(m, opt, phlen) + register struct mbuf *m; + struct mbuf *opt; + int *phlen; { - struct ipoption *p = mtod(opt, struct ipoption *); + register struct ipoption *p = mtod(opt, struct ipoption *); struct mbuf *n; - struct ip *ip = mtod(m, struct ip *); + register struct ip *ip = mtod(m, struct ip *); unsigned optlen; optlen = opt->m_len - sizeof(p->ipopt_dst); - if (optlen + ip->ip_len > IP_MAXPACKET) { + if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) { *phlen = 0; return (m); /* XXX should fail */ } @@ -1343,9 +1302,10 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) * omitting those not copied during fragmentation. */ int -ip_optcopy(struct ip *ip, struct ip *jp) +ip_optcopy(ip, jp) + struct ip *ip, *jp; { - u_char *cp, *dp; + register u_char *cp, *dp; int opt, optlen, cnt; cp = (u_char *)(ip + 1); @@ -1385,7 +1345,9 @@ ip_optcopy(struct ip *ip, struct ip *jp) * IP socket option processing. */ int -ip_ctloutput(struct socket *so, struct sockopt *sopt) +ip_ctloutput(so, sopt) + struct socket *so; + struct sockopt *sopt; { struct inpcb *inp = sotoinpcb(so); int error, optval; @@ -1646,10 +1608,13 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) * with destination address if source routed. */ static int -ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m) +ip_pcbopts(optname, pcbopt, m) + int optname; + struct mbuf **pcbopt; + register struct mbuf *m; { - int cnt, optlen; - u_char *cp; + register int cnt, optlen; + register u_char *cp; u_char opt; /* turn off any old options */ @@ -1752,7 +1717,9 @@ bad: * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. */ static struct ifnet * -ip_multicast_if(struct in_addr *a, int *ifindexp) +ip_multicast_if(a, ifindexp) + struct in_addr *a; + int *ifindexp; { int ifindex; struct ifnet *ifp; @@ -1776,7 +1743,9 @@ ip_multicast_if(struct in_addr *a, int *ifindexp) * Set the IP multicast options in response to user setsockopt(). */ static int -ip_setmoptions(struct sockopt *sopt, struct ip_moptions **imop) +ip_setmoptions(sopt, imop) + struct sockopt *sopt; + struct ip_moptions **imop; { int error = 0; int i; @@ -2072,7 +2041,9 @@ ip_setmoptions(struct sockopt *sopt, struct ip_moptions **imop) * Return the IP multicast options in response to user getsockopt(). */ static int -ip_getmoptions(struct sockopt *sopt, struct ip_moptions *imo) +ip_getmoptions(sopt, imo) + struct sockopt *sopt; + register struct ip_moptions *imo; { struct in_addr addr; struct in_ifaddr *ia; @@ -2136,9 +2107,10 @@ ip_getmoptions(struct sockopt *sopt, struct ip_moptions *imo) * Discard the IP multicast options. */ void -ip_freemoptions(struct ip_moptions *imo) +ip_freemoptions(imo) + register struct ip_moptions *imo; { - int i; + register int i; if (imo != NULL) { for (i = 0; i < imo->imo_num_memberships; ++i) @@ -2155,13 +2127,16 @@ ip_freemoptions(struct ip_moptions *imo) * replicating that code here. */ static void -ip_mloopback(struct ifnet *ifp, struct mbuf *m, - struct sockaddr_in *dst, int hlen) +ip_mloopback(ifp, m, dst, hlen) + struct ifnet *ifp; + register struct mbuf *m; + register struct sockaddr_in *dst; + int hlen; { - struct ip *ip; + register struct ip *ip; struct mbuf *copym; - copym = m_copypacket(m, M_DONTWAIT); + copym = m_copy(m, 0, M_COPYALL); if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) copym = m_pullup(copym, hlen); if (copym != NULL) { diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 642580ff008d..c03c44658252 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -163,8 +163,6 @@ extern struct pr_usrreqs rip_usrreqs; int ip_ctloutput(struct socket *, struct sockopt *sopt); void ip_drain(void); -int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, - u_long if_hwassist_flags, int sw_csum); void ip_freemoptions(struct ip_moptions *); void ip_init(void); extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,