A step in resolving mess with byte ordering for AF_INET. After this change:

- All packets in NETISR_IP queue are in net byte order.
  - ip_input() is entered in net byte order and converts packet
    to host byte order right _after_ processing pfil(9) hooks.
  - ip_output() is entered in host byte order and converts packet
    to net byte order right _before_ processing pfil(9) hooks.
  - ip_fragment() accepts and emits packet in net byte order.
  - ip_forward(), ip_mloopback() use host byte order (untouched actually).
  - ip_fastforward() no longer modifies packet at all (except ip_ttl).
  - Swapping of byte order there and back removed from the following modules:
    pf(4), ipfw(4), enc(4), if_bridge(4).
  - Swapping of byte order added to ipfilter(4), based on __FreeBSD_version
  - __FreeBSD_version bumped.
  - pfil(9) manual page updated.

Reviewed by:	ray, luigi, eri, melifaro
Tested by:	glebius (LE), ray (BE)
This commit is contained in:
glebius 2012-10-06 10:02:11 +00:00
parent 30f3c300d8
commit f3a0231bff
11 changed files with 80 additions and 144 deletions

View File

@ -24,6 +24,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW:
disable the most expensive debugging functionality run
"ln -s 'abort:false,junk:false' /etc/malloc.conf".)
20121006:
The pfil(9) API/ABI for AF_INET family has been changed. Packet
filtering modules: pf(4), ipfw(4), ipfilter(4) need to be recompiled
with new kernel.
20121001:
The net80211(4) ABI has been changed to allow for improved driver
PS-POLL and power-save support. All wireless drivers need to be

View File

@ -28,7 +28,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd September 16, 2012
.Dd October 6, 2012
.Dt PFIL 9
.Os
.Sh NAME
@ -127,10 +127,9 @@ Currently, filtering points are implemented for the following link types:
.Pp
.Bl -tag -width "AF_INET6" -offset XXX -compact
.It AF_INET
IPv4 packets.
.It AF_INET6
IPv4 and IPv6 packets. Note that packet header is already
.Cm converted to host format.
Host format has to be preserved in case of header modifications.
IPv6 packets.
.It AF_LINK
Link-layer packets.
.El

View File

@ -2513,7 +2513,7 @@ int out;
} else
#endif
{
#if (defined(OpenBSD) && (OpenBSD >= 200311)) && defined(_KERNEL)
#if ((defined(OpenBSD) && (OpenBSD >= 200311)) || (defined(FreeBSD) && (__FreeBSD_version >= 1000019))) && defined(_KERNEL)
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
#endif
@ -2777,7 +2777,7 @@ int out;
RWLOCK_EXIT(&ipf_global);
#ifdef _KERNEL
# if (defined(OpenBSD) && (OpenBSD >= 200311))
# if (defined(OpenBSD) && (OpenBSD >= 200311)) || (defined(FreeBSD) && (__FreeBSD_version >= 1000019))
if (FR_ISPASS(pass) && (v == 4)) {
ip = fin->fin_ip;
ip->ip_len = ntohs(ip->ip_len);

View File

@ -3092,15 +3092,6 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
*/
switch (ether_type) {
case ETHERTYPE_IP:
/*
* before calling the firewall, swap fields the same as
* IP does. here we assume the header is contiguous
*/
ip = mtod(*mp, struct ip *);
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
/*
* Run pfil on the member interface and the bridge, both can
* be skipped by clearing pfil_member or pfil_bridge.
@ -3139,7 +3130,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
}
}
/* Recalculate the ip checksum and restore byte ordering */
/* Recalculate the ip checksum. */
ip = mtod(*mp, struct ip *);
hlen = ip->ip_hl << 2;
if (hlen < sizeof(struct ip))
@ -3151,8 +3142,6 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
if (ip == NULL)
goto bad;
}
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (hlen == sizeof(struct ip))
ip->ip_sum = in_cksum_hdr(ip);

View File

@ -270,23 +270,8 @@ ipsec_filter(struct mbuf **mp, int dir, int flags)
switch (ip->ip_v) {
#ifdef INET
case 4:
/*
* before calling the firewall, swap fields the same as
* IP does. here we assume the header is contiguous
*/
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
error = pfil_run_hooks(&V_inet_pfil_hook, mp,
encif, dir, NULL);
if (*mp == NULL || error != 0)
break;
/* restore byte ordering */
ip = mtod(*mp, struct ip *);
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
break;
#endif
#ifdef INET6

View File

@ -164,7 +164,7 @@ ip_fastforward(struct mbuf *m)
struct sockaddr_in *dst = NULL;
struct ifnet *ifp;
struct in_addr odest, dest;
u_short sum, ip_len;
uint16_t sum, ip_len, ip_off;
int error = 0;
int hlen, mtu;
#ifdef IPFIREWALL_FORWARD
@ -340,12 +340,6 @@ ip_fastforward(struct mbuf *m)
* Step 3: incoming packet firewall processing
*/
/*
* Convert to host representation
*/
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
odest.s_addr = dest.s_addr = ip->ip_dst.s_addr;
/*
@ -472,8 +466,6 @@ ip_fastforward(struct mbuf *m)
forwardlocal:
/*
* Return packet for processing by ip_input().
* Keep host byte order as expected at ip_input's
* "ours"-label.
*/
m->m_flags |= M_FASTFWD_OURS;
if (ro.ro_rt)
@ -500,6 +492,8 @@ ip_fastforward(struct mbuf *m)
/*
* Step 6: send off the packet
*/
ip_len = ntohs(ip->ip_len);
ip_off = ntohs(ip->ip_off);
/*
* Check if route is dampned (when ARP is unable to resolve)
@ -515,7 +509,7 @@ ip_fastforward(struct mbuf *m)
/*
* Check if there is enough space in the interface queue
*/
if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
if ((ifp->if_snd.ifq_len + ip_len / ifp->if_mtu + 1) >=
ifp->if_snd.ifq_maxlen) {
IPSTAT_INC(ips_odropped);
/* would send source quench here but that is depreciated */
@ -539,13 +533,8 @@ ip_fastforward(struct mbuf *m)
else
mtu = ifp->if_mtu;
if (ip->ip_len <= mtu ||
(ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
/*
* Restore packet header fields to original values
*/
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
if (ip_len <= mtu ||
(ifp->if_hwassist & CSUM_FRAGMENT && (ip_off & IP_DF) == 0)) {
/*
* Send off the packet via outgoing interface
*/
@ -555,7 +544,7 @@ ip_fastforward(struct mbuf *m)
/*
* Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
*/
if (ip->ip_off & IP_DF) {
if (ip_off & IP_DF) {
IPSTAT_INC(ips_cantfrag);
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
0, mtu);
@ -565,10 +554,6 @@ ip_fastforward(struct mbuf *m)
* We have to fragment the packet
*/
m->m_pkthdr.csum_flags |= CSUM_IP;
/*
* ip_fragment expects ip_len and ip_off in host byte
* order but returns all packets in network byte order
*/
if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
(~ifp->if_hwassist & CSUM_DELAY_IP))) {
goto drop;

View File

@ -380,20 +380,18 @@ ip_input(struct mbuf *m)
struct ifaddr *ifa;
struct ifnet *ifp;
int checkif, hlen = 0;
u_short sum;
uint16_t sum, ip_len;
int dchg = 0; /* dest changed after fw */
struct in_addr odst; /* original dst address */
M_ASSERTPKTHDR(m);
if (m->m_flags & M_FASTFWD_OURS) {
/*
* Firewall or NAT changed destination to local.
* We expect ip_len and ip_off to be in host byte order.
*/
m->m_flags &= ~M_FASTFWD_OURS;
/* Set up some basics that will be used later. */
ip = mtod(m, struct ip *);
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
hlen = ip->ip_hl << 2;
goto ours;
}
@ -458,15 +456,11 @@ ip_input(struct mbuf *m)
return;
#endif
/*
* Convert fields to host representation.
*/
ip->ip_len = ntohs(ip->ip_len);
if (ip->ip_len < hlen) {
ip_len = ntohs(ip->ip_len);
if (ip_len < hlen) {
IPSTAT_INC(ips_badlen);
goto bad;
}
ip->ip_off = ntohs(ip->ip_off);
/*
* Check that the amount of data in the buffers
@ -474,17 +468,17 @@ ip_input(struct mbuf *m)
* Trim mbufs if longer than we expect.
* Drop packet if shorter than we expect.
*/
if (m->m_pkthdr.len < ip->ip_len) {
if (m->m_pkthdr.len < ip_len) {
tooshort:
IPSTAT_INC(ips_tooshort);
goto bad;
}
if (m->m_pkthdr.len > ip->ip_len) {
if (m->m_pkthdr.len > ip_len) {
if (m->m_len == m->m_pkthdr.len) {
m->m_len = ip->ip_len;
m->m_pkthdr.len = ip->ip_len;
m->m_len = ip_len;
m->m_pkthdr.len = ip_len;
} else
m_adj(m, ip->ip_len - m->m_pkthdr.len);
m_adj(m, ip_len - m->m_pkthdr.len);
}
#ifdef IPSEC
/*
@ -519,6 +513,8 @@ ip_input(struct mbuf *m)
#ifdef IPFIREWALL_FORWARD
if (m->m_flags & M_FASTFWD_OURS) {
m->m_flags &= ~M_FASTFWD_OURS;
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
goto ours;
}
if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
@ -527,12 +523,21 @@ ip_input(struct mbuf *m)
* packets originally destined to us to some other directly
* connected host.
*/
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
ip_forward(m, dchg);
return;
}
#endif /* IPFIREWALL_FORWARD */
passin:
/*
* From now and up to output pfil(9) processing in ip_output()
* the header is in host byte order.
*/
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
/*
* Process options and, if not destined for us,
* ship it on. ip_dooptions returns 1 when an
@ -1360,6 +1365,8 @@ u_char inetctlerrmap[PRC_NCMDS] = {
*
* The srcrt parameter indicates whether the packet is being forwarded
* via a source route.
*
* IP header in host byte order.
*/
void
ip_forward(struct mbuf *m, int srcrt)

View File

@ -125,7 +125,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
int error = 0;
struct sockaddr_in *dst;
struct in_ifaddr *ia;
int isbroadcast, sw_csum;
int isbroadcast;
uint16_t ip_len, ip_off, sw_csum;
struct route iproute;
struct rtentry *rte; /* cache for ro->ro_rt */
struct in_addr odst;
@ -501,6 +502,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
hlen = ip->ip_hl << 2;
#endif /* IPSEC */
/*
* To network byte order. pfil(9) hooks and ip_fragment() expect this.
*/
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
/* Jump over all PFIL processing if hooks are not active. */
if (!PFIL_HOOKED(&V_inet_pfil_hook))
goto passout;
@ -537,6 +544,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
} else {
if (ia != NULL)
ifa_free(&ia->ia_ifa);
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
goto again; /* Redo the routing table lookup. */
}
}
@ -570,11 +579,16 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
m_tag_delete(m, fwd_tag);
if (ia != NULL)
ifa_free(&ia->ia_ifa);
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
goto again;
}
#endif /* IPFIREWALL_FORWARD */
passout:
ip_len = ntohs(ip->ip_len);
ip_off = ntohs(ip->ip_off);
/* 127/8 must not appear on wire - RFC1122. */
if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
(ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
@ -603,11 +617,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* If small enough for interface, or the interface will take
* care of the fragmentation for us, we can just send directly.
*/
if (ip->ip_len <= mtu ||
if (ip_len <= mtu ||
(m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
((ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
ip->ip_sum = in_cksum(m, hlen);
@ -641,7 +653,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
}
/* Balk when DF bit is set or the interface didn't support TSO. */
if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
error = EMSGSIZE;
IPSTAT_INC(ips_cantfrag);
goto bad;
@ -710,8 +722,12 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
int firstlen;
struct mbuf **mnext;
int nfrags;
uint16_t ip_len, ip_off;
if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
ip_len = ntohs(ip->ip_len);
ip_off = ntohs(ip->ip_off);
if (ip_off & IP_DF) { /* Fragmentation not allowed */
IPSTAT_INC(ips_cantfrag);
return EMSGSIZE;
}
@ -785,7 +801,7 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
* The fragments are linked off the m_nextpkt of the original
* packet, which after processing serves as the first fragment.
*/
for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
for (nfrags = 1; off < ip_len; off += len, nfrags++) {
struct ip *mhip; /* ip header on the fragment */
struct mbuf *m;
int mhlen = sizeof (struct ip);
@ -811,10 +827,10 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
mhip->ip_hl = mhlen >> 2;
}
m->m_len = mhlen;
/* XXX do we need to add ip->ip_off below ? */
mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
if (off + len >= ip->ip_len) { /* last fragment */
len = ip->ip_len - off;
/* XXX do we need to add ip_off below ? */
mhip->ip_off = ((off - hlen) >> 3) + ip_off;
if (off + len >= ip_len) { /* last fragment */
len = ip_len - off;
m->m_flags |= M_LASTFRAG;
} else
mhip->ip_off |= IP_MF;
@ -849,11 +865,10 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
* Update first fragment by trimming what's been copied out
* and updating header.
*/
m_adj(m0, hlen + firstlen - ip->ip_len);
m_adj(m0, hlen + firstlen - ip_len);
m0->m_pkthdr.len = hlen + firstlen;
ip->ip_len = htons((u_short)m0->m_pkthdr.len);
ip->ip_off |= IP_MF;
ip->ip_off = htons(ip->ip_off);
ip->ip_off = htons(ip_off | IP_MF);
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
ip->ip_sum = in_cksum(m0, hlen);
@ -1279,6 +1294,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
* calls the output routine of the loopback "driver", but with an interface
* pointer that might NOT be a loopback interface -- evil, but easier than
* replicating that code here.
*
* IP header in host byte order.
*/
static void
ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,

View File

@ -125,10 +125,6 @@ ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
int ipfw;
int ret;
/* all the processing now uses ip_len in net format */
if (mtod(*m0, struct ip *)->ip_v == 4)
SET_NET_IPLEN(mtod(*m0, struct ip *));
/* convert dir to IPFW values */
dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
bzero(&args, sizeof(args));
@ -288,8 +284,7 @@ ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
FREE_PKT(*m0);
*m0 = NULL;
}
if (*m0 && mtod(*m0, struct ip *)->ip_v == 4)
SET_HOST_IPLEN(mtod(*m0, struct ip *));
return ret;
}

View File

@ -3473,23 +3473,8 @@ static int
pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
/*
* XXX Wed Jul 9 22:03:16 2003 UTC
* OpenBSD has changed its byte ordering convention on ip_len/ip_off
* in network stack. OpenBSD's network stack have converted
* ip_len/ip_off to host byte order frist as FreeBSD.
* Now this is not true anymore , so we should convert back to network
* byte order.
*/
struct ip *h = NULL;
int chk;
if ((*m)->m_pkthdr.len >= (int)sizeof(struct ip)) {
/* if m_pkthdr.len is less than ip header, pf will handle. */
h = mtod(*m, struct ip *);
HTONS(h->ip_len);
HTONS(h->ip_off);
}
CURVNET_SET(ifp->if_vnet);
chk = pf_test(PF_IN, ifp, m, inp);
CURVNET_RESTORE();
@ -3497,28 +3482,14 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
m_freem(*m);
*m = NULL;
}
if (*m != NULL) {
/* pf_test can change ip header location */
h = mtod(*m, struct ip *);
NTOHS(h->ip_len);
NTOHS(h->ip_off);
}
return chk;
return (chk);
}
static int
pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
/*
* XXX Wed Jul 9 22:03:16 2003 UTC
* OpenBSD has changed its byte ordering convention on ip_len/ip_off
* in network stack. OpenBSD's network stack have converted
* ip_len/ip_off to host byte order frist as FreeBSD.
* Now this is not true anymore , so we should convert back to network
* byte order.
*/
struct ip *h = NULL;
int chk;
/* We need a proper CSUM befor we start (s. OpenBSD ip_output) */
@ -3526,12 +3497,7 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
in_delayed_cksum(*m);
(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
if ((*m)->m_pkthdr.len >= (int)sizeof(*h)) {
/* if m_pkthdr.len is less than ip header, pf will handle. */
h = mtod(*m, struct ip *);
HTONS(h->ip_len);
HTONS(h->ip_off);
}
CURVNET_SET(ifp->if_vnet);
chk = pf_test(PF_OUT, ifp, m, inp);
CURVNET_RESTORE();
@ -3539,13 +3505,8 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
m_freem(*m);
*m = NULL;
}
if (*m != NULL) {
/* pf_test can change ip header location */
h = mtod(*m, struct ip *);
NTOHS(h->ip_len);
NTOHS(h->ip_off);
}
return chk;
return (chk);
}
#endif
@ -3554,10 +3515,6 @@ static int
pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
/*
* IPv6 is not affected by ip_len/ip_off byte order changes.
*/
int chk;
/*
@ -3579,9 +3536,6 @@ static int
pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
/*
* IPv6 does not affected ip_len/ip_off byte order changes.
*/
int chk;
/* We need a proper CSUM before we start (s. OpenBSD ip_output) */

View File

@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 1000018 /* Master, propagated to newvers */
#define __FreeBSD_version 1000019 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,