diff --git a/contrib/ipfilter/tools/ipftest.c b/contrib/ipfilter/tools/ipftest.c index a475828b2e8f..378523d3bdf3 100644 --- a/contrib/ipfilter/tools/ipftest.c +++ b/contrib/ipfilter/tools/ipftest.c @@ -864,3 +864,11 @@ void fixv4sums(m, ip) *(u_short *)csump = fr_cksum(&tmp, ip, p, hdr); } } + +void +ip_fillid(struct ip *ip) +{ + static uint16_t ip_id; + + ip->ip_id = ip_id++; +} diff --git a/share/man/man4/inet.4 b/share/man/man4/inet.4 index b0b5a9ee30e7..3a2bf8a394d6 100644 --- a/share/man/man4/inet.4 +++ b/share/man/man4/inet.4 @@ -28,7 +28,7 @@ .\" From: @(#)inet.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd January 26, 2012 +.Dd April 2, 2015 .Dt INET 4 .Os .Sh NAME @@ -226,12 +226,24 @@ This .Xr sysctl 8 variable affects packets destined for a local host as well as packets forwarded to some other host. +.It Va ip.rfc6864 +Boolean: control IP IDs generation behaviour. +True value enables RFC6864 support, which specifies that IP ID field of +.Em atomic +datagrams can be set to any value. +The +.Fx implementation sets it to zero. +Enabled by default. .It Va ip.random_id Boolean: control IP IDs generation behaviour. Setting this .Xr sysctl 8 -to non-zero causes the ID field in IP packets to be randomized instead of -incremented by 1 with each packet generated. +to 1 causes the ID field in +.Em non-atomic +IP datagrams (or all IP datagrams, if +.Va ip.rfc6864 +is disabled) to be randomized instead of incremented by 1 with each packet +generated. This closes a minor information leak which allows remote observers to determine the rate of packet generation on the machine by watching the counter. diff --git a/sys/contrib/ipfilter/netinet/fil.c b/sys/contrib/ipfilter/netinet/fil.c index 55a2a4d76d49..58d902833012 100644 --- a/sys/contrib/ipfilter/netinet/fil.c +++ b/sys/contrib/ipfilter/netinet/fil.c @@ -6086,23 +6086,24 @@ ipf_updateipid(fin) u_32_t sumd, sum; ip_t *ip; + ip = fin->fin_ip; + ido = ntohs(ip->ip_id); if (fin->fin_off != 0) { sum = ipf_frag_ipidknown(fin); if (sum == 0xffffffff) return -1; sum &= 0xffff; id = (u_short)sum; + ip->ip_id = htons(id); } else { - id = ipf_nextipid(fin); - if (fin->fin_off == 0 && (fin->fin_flx & FI_FRAG) != 0) + ip_fillid(ip); + id = ntohs(ip->ip_id); + if ((fin->fin_flx & FI_FRAG) != 0) (void) ipf_frag_ipidnew(fin, (u_32_t)id); } - ip = fin->fin_ip; - ido = ntohs(ip->ip_id); if (id == ido) return 0; - ip->ip_id = htons(id); CALC_SUMD(ido, id, sumd); /* DESTRUCTIVE MACRO! id,ido change */ sum = (~ntohs(ip->ip_sum)) & 0xffff; sum += sumd; diff --git a/sys/contrib/ipfilter/netinet/ip_fil.h b/sys/contrib/ipfilter/netinet/ip_fil.h index 11e8b9bf8459..0ae18e13d9ca 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil.h +++ b/sys/contrib/ipfilter/netinet/ip_fil.h @@ -1718,6 +1718,7 @@ extern int ipfioctl __P((ipf_main_softc_t *, int, ioctlcmd_t, extern void m_freem __P((mb_t *)); extern size_t msgdsize __P((mb_t *)); extern int bcopywrap __P((void *, void *, size_t)); +extern void ip_fillid(struct ip *); #else /* #ifndef _KERNEL */ # if defined(__NetBSD__) && defined(PFIL_HOOKS) extern void ipfilterattach __P((int)); @@ -1932,7 +1933,6 @@ extern int ipf_matchtag __P((ipftag_t *, ipftag_t *)); extern int ipf_matchicmpqueryreply __P((int, icmpinfo_t *, struct icmp *, int)); extern u_32_t ipf_newisn __P((fr_info_t *)); -extern u_short ipf_nextipid __P((fr_info_t *)); extern u_int ipf_pcksum __P((fr_info_t *, int, u_int)); extern void ipf_rule_expire __P((ipf_main_softc_t *)); extern int ipf_scanlist __P((fr_info_t *, u_32_t)); diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c index 551556186127..3bb46cf64ea9 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c @@ -97,7 +97,6 @@ MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures") # endif -static u_short ipid = 0; static int (*ipf_savep) __P((void *, ip_t *, int, void *, int, struct mbuf **)); static int ipf_send_ip __P((fr_info_t *, mb_t *)); static void ipf_timer_func __P((void *arg)); @@ -231,8 +230,6 @@ ipfattach(softc) if (softc->ipf_control_forwarding & 1) V_ipforwarding = 1; - ipid = 0; - SPL_X(s); #if 0 softc->ipf_slow_ch = timeout(ipf_timer_func, softc, @@ -1074,31 +1071,6 @@ ipf_newisn(fin) } -/* ------------------------------------------------------------------------ */ -/* Function: ipf_nextipid */ -/* Returns: int - 0 == success, -1 == error (packet should be droppped) */ -/* Parameters: fin(I) - pointer to packet information */ -/* */ -/* Returns the next IPv4 ID to use for this packet. */ -/* ------------------------------------------------------------------------ */ -u_short -ipf_nextipid(fin) - fr_info_t *fin; -{ - u_short id; - -#ifndef RANDOM_IP_ID - MUTEX_ENTER(&ipfmain.ipf_rw); - id = ipid++; - MUTEX_EXIT(&ipfmain.ipf_rw); -#else - id = ip_randomid(); -#endif - - return id; -} - - INLINE int ipf_checkv4sum(fin) fr_info_t *fin; diff --git a/sys/contrib/ipfilter/netinet/ip_nat.c b/sys/contrib/ipfilter/netinet/ip_nat.c index 6c93810c215a..36b4fe3bd2d7 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.c +++ b/sys/contrib/ipfilter/netinet/ip_nat.c @@ -5221,7 +5221,7 @@ ipf_nat_out(fin, nat, natadd, nflags) } ip = MTOD(m, ip_t *); - ip->ip_id = htons(ipf_nextipid(fin)); + ip_fillid(ip); s2 = ntohs(ip->ip_id); s1 = ip->ip_len; @@ -5666,7 +5666,7 @@ ipf_nat_in(fin, nat, natadd, nflags) } ip = MTOD(m, ip_t *); - ip->ip_id = htons(ipf_nextipid(fin)); + ip_fillid(ip); sum1 = ntohs(ip->ip_len); ip->ip_len = ntohs(ip->ip_len); ip->ip_len += fin->fin_plen; diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 524ebd8443bb..20962b03d83c 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -838,11 +838,11 @@ carp_send_ad_locked(struct carp_softc *sc) ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = IPTOS_LOWDELAY; ip->ip_len = htons(len); - ip->ip_id = ip_newid(); ip->ip_off = htons(IP_DF); ip->ip_ttl = CARP_DFLTTL; ip->ip_p = IPPROTO_CARP; ip->ip_sum = 0; + ip_fillid(ip); bzero(&sa, sizeof(sa)); sa.sa_family = AF_INET; diff --git a/sys/netinet/ip_gre.c b/sys/netinet/ip_gre.c index 59423644c73a..fc64e69edad4 100644 --- a/sys/netinet/ip_gre.c +++ b/sys/netinet/ip_gre.c @@ -145,7 +145,7 @@ in_gre_output(struct mbuf *m, int af, int hlen) #ifdef INET6 case AF_INET6: gi->gi_ip.ip_tos = 0; /* XXX */ - gi->gi_ip.ip_id = ip_newid(); + ip_fillid(&gi->gi_ip); break; #endif } diff --git a/sys/netinet/ip_id.c b/sys/netinet/ip_id.c index 039c62f13742..4f29ae27ea6e 100644 --- a/sys/netinet/ip_id.c +++ b/sys/netinet/ip_id.c @@ -74,26 +74,37 @@ __FBSDID("$FreeBSD$"); * enabled. */ -#include -#include #include -#include -#include -#include +#include +#include +#include #include #include #include -#include +#include #include #include #include #include +#include #include -static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state"); +/* + * By default we generate IP ID only for non-atomic datagrams, as + * suggested by RFC6864. We use per-CPU counter for that, or if + * user wants to, we can turn on random ID generation. + */ +static VNET_DEFINE(int, ip_rfc6864) = 1; +static VNET_DEFINE(int, ip_do_randomid) = 0; +#define V_ip_rfc6864 VNET(ip_rfc6864) +#define V_ip_do_randomid VNET(ip_do_randomid) +/* + * Random ID state engine. + */ +static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state"); static VNET_DEFINE(uint16_t *, id_array); static VNET_DEFINE(bitstr_t *, id_bits); static VNET_DEFINE(int, array_ptr); @@ -109,12 +120,27 @@ static VNET_DEFINE(struct mtx, ip_id_mtx); #define V_random_id_total VNET(random_id_total) #define V_ip_id_mtx VNET(ip_id_mtx) -static void ip_initid(int); +/* + * Non-random ID state engine is simply a per-cpu counter. + */ +static VNET_DEFINE(counter_u64_t, ip_id); +#define V_ip_id VNET(ip_id) + +static int sysctl_ip_randomid(SYSCTL_HANDLER_ARGS); static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS); +static void ip_initid(int); +static uint16_t ip_randomid(void); static void ipid_sysinit(void); static void ipid_sysuninit(void); SYSCTL_DECL(_net_inet_ip); +SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id, + CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(ip_do_randomid), 0, sysctl_ip_randomid, "IU", + "Assign random ip_id values"); +SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(ip_rfc6864), 0, + "Use constant IP ID for atomic datagrams"); SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(array_size), 0, sysctl_ip_id_change, "IU", "IP ID Array size"); @@ -124,6 +150,26 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions, SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD | CTLFLAG_VNET, &VNET_NAME(random_id_total), 0, "Count of IP IDs created"); +static int +sysctl_ip_randomid(SYSCTL_HANDLER_ARGS) +{ + int error, new; + + new = V_ip_do_randomid; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error || req->newptr == NULL) + return (error); + if (new != 0 && new != 1) + return (EINVAL); + if (new == V_ip_do_randomid) + return (0); + if (new == 1 && V_ip_do_randomid == 0) + ip_initid(8192); + /* We don't free memory when turning random ID off, due to race. */ + V_ip_do_randomid = new; + return (0); +} + static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS) { @@ -164,7 +210,7 @@ ip_initid(int new_size) mtx_unlock(&V_ip_id_mtx); } -uint16_t +static uint16_t ip_randomid(void) { uint16_t new_id; @@ -191,12 +237,34 @@ ip_randomid(void) return (new_id); } +void +ip_fillid(struct ip *ip) +{ + + /* + * Per RFC6864 Section 4 + * + * o Atomic datagrams: (DF==1) && (MF==0) && (frag_offset==0) + * o Non-atomic datagrams: (DF==0) || (MF==1) || (frag_offset>0) + */ + if (V_ip_rfc6864 && (ip->ip_off & htons(IP_DF)) == htons(IP_DF)) + ip->ip_id = 0; + else if (V_ip_do_randomid) + ip->ip_id = ip_randomid(); + else { + counter_u64_add(V_ip_id, 1); + ip->ip_id = htons((*(uint64_t *)zpcpu_get(V_ip_id)) & 0xffff); + } +} + static void ipid_sysinit(void) { mtx_init(&V_ip_id_mtx, "ip_id_mtx", NULL, MTX_DEF); - ip_initid(8192); + V_ip_id = counter_u64_alloc(M_WAITOK); + for (int i = 0; i < mp_ncpus; i++) + arc4rand(zpcpu_get_cpu(V_ip_id, i), sizeof(uint64_t), 0); } VNET_SYSINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysinit, NULL); @@ -205,7 +273,10 @@ ipid_sysuninit(void) { mtx_destroy(&V_ip_id_mtx); - free(V_id_array, M_IPID); - free(V_id_bits, M_IPID); + if (V_id_array != NULL) { + free(V_id_array, M_IPID); + free(V_id_bits, M_IPID); + } + counter_u64_free(V_ip_id); } VNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysuninit, NULL); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 9ca6f700099c..a3dd57f5c0bb 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -105,11 +105,6 @@ SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_R &VNET_NAME(ipsendredirects), 0, "Enable sending IP redirects"); -VNET_DEFINE(int, ip_do_randomid); -SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(ip_do_randomid), 0, - "Assign random ip_id values"); - /* * XXX - Setting ip_checkinterface mostly implements the receive side of * the Strong ES model described in RFC 1122, but since the routing table @@ -331,8 +326,6 @@ ip_init(void) struct protosw *pr; int i; - V_ip_id = time_second & 0xffff; - TAILQ_INIT(&V_in_ifaddrhead); V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index 3015148eadf1..a71e91c42b26 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -2501,7 +2501,6 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, */ ip_outer = mtod(mb_first, struct ip *); *ip_outer = pim_encap_iphdr; - ip_outer->ip_id = ip_newid(); ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr)); ip_outer->ip_src = V_viftable[vifi].v_lcl_addr; @@ -2513,6 +2512,7 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, ip_outer->ip_tos = ip->ip_tos; if (ip->ip_off & htons(IP_DF)) ip_outer->ip_off |= htons(IP_DF); + ip_fillid(ip_outer); pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer + sizeof(pim_encap_iphdr)); *pimhdr = pim_encap_pimhdr; diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index a8af410be9ea..b948bef22b41 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -91,8 +91,6 @@ __FBSDID("$FreeBSD$"); #include -VNET_DEFINE(uint32_t, ip_id); - #ifdef MBUF_STRESS_TEST static int mbuf_frag_size = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, @@ -174,21 +172,10 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); - /* - * Fill in IP header. If we are not allowing fragmentation, - * then the ip_id field is meaningless, but we don't set it - * to zero. Doing so causes various problems when devices along - * the path (routers, load balancers, firewalls, etc.) illegally - * disable DF on our packet. Note that a 16-bit counter - * will wrap around in less than 10 seconds at 100 Mbit/s on a - * medium with MTU 1500. See Steven M. Bellovin, "A Technique - * for Counting NATted Hosts", Proc. IMW'02, available at - * . - */ if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { ip->ip_v = IPVERSION; ip->ip_hl = hlen >> 2; - ip->ip_id = ip_newid(); + ip_fillid(ip); IPSTAT_INC(ips_localout); } else { /* Header already set, fetch hlen from there */ diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 00d7a970a089..239bc7342f80 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -174,7 +174,6 @@ struct inpcb; struct route; struct sockopt; -VNET_DECLARE(uint32_t, ip_id); /* ip packet ctr, for ids */ VNET_DECLARE(int, ip_defttl); /* default IP ttl */ VNET_DECLARE(int, ipforwarding); /* ip forwarding */ #ifdef IPSTEALTH @@ -228,7 +227,7 @@ struct in_ifaddr * void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); void ip_slowtimo(void); -uint16_t ip_randomid(void); +void ip_fillid(struct ip *); int rip_ctloutput(struct socket *, struct sockopt *); void rip_ctlinput(int, struct sockaddr *, void *); void rip_init(void); @@ -302,22 +301,6 @@ extern int (*ng_ipfw_input_p)(struct mbuf **, int, extern int (*ip_dn_ctl_ptr)(struct sockopt *); extern int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *); - -VNET_DECLARE(int, ip_do_randomid); -#define V_ip_do_randomid VNET(ip_do_randomid) -static __inline uint16_t -ip_newid(void) -{ - uint16_t res; - - if (V_ip_do_randomid != 0) - return (ip_randomid()); - else { - res = atomic_fetchadd_32(&V_ip_id, 1) & 0xFFFF; - return (htons(res)); - } -} - #endif /* _KERNEL */ #endif /* !_NETINET_IP_VAR_H_ */ diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index cea08e361073..a71d8ec86f71 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -505,8 +505,12 @@ rip_output(struct mbuf *m, struct socket *so, ...) m_freem(m); return (EINVAL); } + /* + * This doesn't allow application to specify ID of zero, + * but we got this limitation from the beginning of history. + */ if (ip->ip_id == 0) - ip->ip_id = ip_newid(); + ip_fillid(ip); /* * XXX prevent ip_output from overwriting header fields. diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index cb59dab5359d..ed3b9ec6818d 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -4106,7 +4106,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, ip->ip_off = htons(0); } /* FreeBSD has a function for ip_id's */ - ip->ip_id = ip_newid(); + ip_fillid(ip); ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl; ip->ip_len = htons(packet_length); @@ -10949,8 +10949,8 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst, ip->ip_v = IPVERSION; ip->ip_hl = (sizeof(struct ip) >> 2); ip->ip_tos = 0; - ip->ip_id = ip_newid(); ip->ip_off = 0; + ip_fillid(ip); ip->ip_ttl = MODULE_GLOBAL(ip_defttl); if (port) { ip->ip_p = IPPROTO_UDP; diff --git a/sys/netipsec/xform_ipip.c b/sys/netipsec/xform_ipip.c index 5167b9661b9c..fc6c4a132633 100644 --- a/sys/netipsec/xform_ipip.c +++ b/sys/netipsec/xform_ipip.c @@ -136,9 +136,6 @@ ipip_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, ipo->ip_sum = 0; ipo->ip_src = saidx->src.sin.sin_addr; ipo->ip_dst = saidx->dst.sin.sin_addr; - - ipo->ip_id = ip_newid(); - /* If the inner protocol is IP... */ switch (tp) { case IPVERSION: @@ -178,6 +175,7 @@ ipip_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, default: goto nofamily; } + ip_fillid(ipo); otos = 0; ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index e23d2e007a19..3d62cf5ef6dd 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -1538,7 +1538,7 @@ pfsync_sendout(int schedswi) offset = sizeof(*ip); ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_id = htons(ip_randomid()); + ip_fillid(ip); /* build the pfsync header */ ph = (struct pfsync_header *)(m->m_data + offset); diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c index 01c0958f2141..5ffb86dd6ad2 100644 --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -2271,9 +2271,9 @@ pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) /* random-id, but not for fragments */ if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) { - u_int16_t ip_id = h->ip_id; + uint16_t ip_id = h->ip_id; - h->ip_id = ip_randomid(); + ip_fillid(h); h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); } }