o Use new function ip_fillid() in all places throughout the kernel,

where we want to create a new IP datagram.
o Add support for RFC6864, which allows to set IP ID for atomic IP
  datagrams to any value, to improve performance. The behaviour is
  controlled by net.inet.ip.rfc6864 sysctl knob, which is enabled by
  default.
o In case if we generate IP ID, use counter(9) to improve performance.
o Gather all code related to IP ID into ip_id.c.

Differential Revision:		https://reviews.freebsd.org/D2177
Reviewed by:			adrian, cy, rpaulo
Tested by:			Emeric POUPON <emeric.poupon stormshield.eu>
Sponsored by:			Netflix
Sponsored by:			Nginx, Inc.
Relnotes:			yes
This commit is contained in:
Gleb Smirnoff 2015-04-01 22:26:39 +00:00
parent 703e4974aa
commit 6d947416cc
18 changed files with 131 additions and 102 deletions

View File

@ -864,3 +864,11 @@ void fixv4sums(m, ip)
*(u_short *)csump = fr_cksum(&tmp, ip, p, hdr);
}
}
void
ip_fillid(struct ip *ip)
{
static uint16_t ip_id;
ip->ip_id = ip_id++;
}

View File

@ -28,7 +28,7 @@
.\" From: @(#)inet.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
.Dd January 26, 2012
.Dd April 2, 2015
.Dt INET 4
.Os
.Sh NAME
@ -226,12 +226,24 @@ This
.Xr sysctl 8
variable affects packets destined for a local host as well as packets
forwarded to some other host.
.It Va ip.rfc6864
Boolean: control IP IDs generation behaviour.
True value enables RFC6864 support, which specifies that IP ID field of
.Em atomic
datagrams can be set to any value.
The
.Fx implementation sets it to zero.
Enabled by default.
.It Va ip.random_id
Boolean: control IP IDs generation behaviour.
Setting this
.Xr sysctl 8
to non-zero causes the ID field in IP packets to be randomized instead of
incremented by 1 with each packet generated.
to 1 causes the ID field in
.Em non-atomic
IP datagrams (or all IP datagrams, if
.Va ip.rfc6864
is disabled) to be randomized instead of incremented by 1 with each packet
generated.
This closes a minor information leak which allows remote observers to
determine the rate of packet generation on the machine by watching the
counter.

View File

@ -6086,23 +6086,24 @@ ipf_updateipid(fin)
u_32_t sumd, sum;
ip_t *ip;
ip = fin->fin_ip;
ido = ntohs(ip->ip_id);
if (fin->fin_off != 0) {
sum = ipf_frag_ipidknown(fin);
if (sum == 0xffffffff)
return -1;
sum &= 0xffff;
id = (u_short)sum;
ip->ip_id = htons(id);
} else {
id = ipf_nextipid(fin);
if (fin->fin_off == 0 && (fin->fin_flx & FI_FRAG) != 0)
ip_fillid(ip);
id = ntohs(ip->ip_id);
if ((fin->fin_flx & FI_FRAG) != 0)
(void) ipf_frag_ipidnew(fin, (u_32_t)id);
}
ip = fin->fin_ip;
ido = ntohs(ip->ip_id);
if (id == ido)
return 0;
ip->ip_id = htons(id);
CALC_SUMD(ido, id, sumd); /* DESTRUCTIVE MACRO! id,ido change */
sum = (~ntohs(ip->ip_sum)) & 0xffff;
sum += sumd;

View File

@ -1718,6 +1718,7 @@ extern int ipfioctl __P((ipf_main_softc_t *, int, ioctlcmd_t,
extern void m_freem __P((mb_t *));
extern size_t msgdsize __P((mb_t *));
extern int bcopywrap __P((void *, void *, size_t));
extern void ip_fillid(struct ip *);
#else /* #ifndef _KERNEL */
# if defined(__NetBSD__) && defined(PFIL_HOOKS)
extern void ipfilterattach __P((int));
@ -1932,7 +1933,6 @@ extern int ipf_matchtag __P((ipftag_t *, ipftag_t *));
extern int ipf_matchicmpqueryreply __P((int, icmpinfo_t *,
struct icmp *, int));
extern u_32_t ipf_newisn __P((fr_info_t *));
extern u_short ipf_nextipid __P((fr_info_t *));
extern u_int ipf_pcksum __P((fr_info_t *, int, u_int));
extern void ipf_rule_expire __P((ipf_main_softc_t *));
extern int ipf_scanlist __P((fr_info_t *, u_32_t));

View File

@ -97,7 +97,6 @@ MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures")
# endif
static u_short ipid = 0;
static int (*ipf_savep) __P((void *, ip_t *, int, void *, int, struct mbuf **));
static int ipf_send_ip __P((fr_info_t *, mb_t *));
static void ipf_timer_func __P((void *arg));
@ -231,8 +230,6 @@ ipfattach(softc)
if (softc->ipf_control_forwarding & 1)
V_ipforwarding = 1;
ipid = 0;
SPL_X(s);
#if 0
softc->ipf_slow_ch = timeout(ipf_timer_func, softc,
@ -1074,31 +1071,6 @@ ipf_newisn(fin)
}
/* ------------------------------------------------------------------------ */
/* Function: ipf_nextipid */
/* Returns: int - 0 == success, -1 == error (packet should be droppped) */
/* Parameters: fin(I) - pointer to packet information */
/* */
/* Returns the next IPv4 ID to use for this packet. */
/* ------------------------------------------------------------------------ */
u_short
ipf_nextipid(fin)
fr_info_t *fin;
{
u_short id;
#ifndef RANDOM_IP_ID
MUTEX_ENTER(&ipfmain.ipf_rw);
id = ipid++;
MUTEX_EXIT(&ipfmain.ipf_rw);
#else
id = ip_randomid();
#endif
return id;
}
INLINE int
ipf_checkv4sum(fin)
fr_info_t *fin;

View File

@ -5221,7 +5221,7 @@ ipf_nat_out(fin, nat, natadd, nflags)
}
ip = MTOD(m, ip_t *);
ip->ip_id = htons(ipf_nextipid(fin));
ip_fillid(ip);
s2 = ntohs(ip->ip_id);
s1 = ip->ip_len;
@ -5666,7 +5666,7 @@ ipf_nat_in(fin, nat, natadd, nflags)
}
ip = MTOD(m, ip_t *);
ip->ip_id = htons(ipf_nextipid(fin));
ip_fillid(ip);
sum1 = ntohs(ip->ip_len);
ip->ip_len = ntohs(ip->ip_len);
ip->ip_len += fin->fin_plen;

View File

@ -838,11 +838,11 @@ carp_send_ad_locked(struct carp_softc *sc)
ip->ip_hl = sizeof(*ip) >> 2;
ip->ip_tos = IPTOS_LOWDELAY;
ip->ip_len = htons(len);
ip->ip_id = ip_newid();
ip->ip_off = htons(IP_DF);
ip->ip_ttl = CARP_DFLTTL;
ip->ip_p = IPPROTO_CARP;
ip->ip_sum = 0;
ip_fillid(ip);
bzero(&sa, sizeof(sa));
sa.sa_family = AF_INET;

View File

@ -145,7 +145,7 @@ in_gre_output(struct mbuf *m, int af, int hlen)
#ifdef INET6
case AF_INET6:
gi->gi_ip.ip_tos = 0; /* XXX */
gi->gi_ip.ip_id = ip_newid();
ip_fillid(&gi->gi_ip);
break;
#endif
}

View File

@ -74,26 +74,37 @@ __FBSDID("$FreeBSD$");
* enabled.
*/
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/libkern.h>
#include <sys/systm.h>
#include <sys/counter.h>
#include <sys/malloc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/random.h>
#include <sys/systm.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/bitstring.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
/*
* By default we generate IP ID only for non-atomic datagrams, as
* suggested by RFC6864. We use per-CPU counter for that, or if
* user wants to, we can turn on random ID generation.
*/
static VNET_DEFINE(int, ip_rfc6864) = 1;
static VNET_DEFINE(int, ip_do_randomid) = 0;
#define V_ip_rfc6864 VNET(ip_rfc6864)
#define V_ip_do_randomid VNET(ip_do_randomid)
/*
* Random ID state engine.
*/
static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
static VNET_DEFINE(uint16_t *, id_array);
static VNET_DEFINE(bitstr_t *, id_bits);
static VNET_DEFINE(int, array_ptr);
@ -109,12 +120,27 @@ static VNET_DEFINE(struct mtx, ip_id_mtx);
#define V_random_id_total VNET(random_id_total)
#define V_ip_id_mtx VNET(ip_id_mtx)
static void ip_initid(int);
/*
* Non-random ID state engine is simply a per-cpu counter.
*/
static VNET_DEFINE(counter_u64_t, ip_id);
#define V_ip_id VNET(ip_id)
static int sysctl_ip_randomid(SYSCTL_HANDLER_ARGS);
static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS);
static void ip_initid(int);
static uint16_t ip_randomid(void);
static void ipid_sysinit(void);
static void ipid_sysuninit(void);
SYSCTL_DECL(_net_inet_ip);
SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id,
CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_do_randomid), 0, sysctl_ip_randomid, "IU",
"Assign random ip_id values");
SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_rfc6864), 0,
"Use constant IP ID for atomic datagrams");
SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
&VNET_NAME(array_size), 0, sysctl_ip_id_change, "IU", "IP ID Array size");
@ -124,6 +150,26 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions,
SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD | CTLFLAG_VNET,
&VNET_NAME(random_id_total), 0, "Count of IP IDs created");
static int
sysctl_ip_randomid(SYSCTL_HANDLER_ARGS)
{
int error, new;
new = V_ip_do_randomid;
error = sysctl_handle_int(oidp, &new, 0, req);
if (error || req->newptr == NULL)
return (error);
if (new != 0 && new != 1)
return (EINVAL);
if (new == V_ip_do_randomid)
return (0);
if (new == 1 && V_ip_do_randomid == 0)
ip_initid(8192);
/* We don't free memory when turning random ID off, due to race. */
V_ip_do_randomid = new;
return (0);
}
static int
sysctl_ip_id_change(SYSCTL_HANDLER_ARGS)
{
@ -164,7 +210,7 @@ ip_initid(int new_size)
mtx_unlock(&V_ip_id_mtx);
}
uint16_t
static uint16_t
ip_randomid(void)
{
uint16_t new_id;
@ -191,12 +237,34 @@ ip_randomid(void)
return (new_id);
}
void
ip_fillid(struct ip *ip)
{
/*
* Per RFC6864 Section 4
*
* o Atomic datagrams: (DF==1) && (MF==0) && (frag_offset==0)
* o Non-atomic datagrams: (DF==0) || (MF==1) || (frag_offset>0)
*/
if (V_ip_rfc6864 && (ip->ip_off & htons(IP_DF)) == htons(IP_DF))
ip->ip_id = 0;
else if (V_ip_do_randomid)
ip->ip_id = ip_randomid();
else {
counter_u64_add(V_ip_id, 1);
ip->ip_id = htons((*(uint64_t *)zpcpu_get(V_ip_id)) & 0xffff);
}
}
static void
ipid_sysinit(void)
{
mtx_init(&V_ip_id_mtx, "ip_id_mtx", NULL, MTX_DEF);
ip_initid(8192);
V_ip_id = counter_u64_alloc(M_WAITOK);
for (int i = 0; i < mp_ncpus; i++)
arc4rand(zpcpu_get_cpu(V_ip_id, i), sizeof(uint64_t), 0);
}
VNET_SYSINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysinit, NULL);
@ -205,7 +273,10 @@ ipid_sysuninit(void)
{
mtx_destroy(&V_ip_id_mtx);
free(V_id_array, M_IPID);
free(V_id_bits, M_IPID);
if (V_id_array != NULL) {
free(V_id_array, M_IPID);
free(V_id_bits, M_IPID);
}
counter_u64_free(V_ip_id);
}
VNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysuninit, NULL);

View File

@ -105,11 +105,6 @@ SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_R
&VNET_NAME(ipsendredirects), 0,
"Enable sending IP redirects");
VNET_DEFINE(int, ip_do_randomid);
SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_do_randomid), 0,
"Assign random ip_id values");
/*
* XXX - Setting ip_checkinterface mostly implements the receive side of
* the Strong ES model described in RFC 1122, but since the routing table
@ -331,8 +326,6 @@ ip_init(void)
struct protosw *pr;
int i;
V_ip_id = time_second & 0xffff;
TAILQ_INIT(&V_in_ifaddrhead);
V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);

View File

@ -2501,7 +2501,6 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
*/
ip_outer = mtod(mb_first, struct ip *);
*ip_outer = pim_encap_iphdr;
ip_outer->ip_id = ip_newid();
ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) +
sizeof(pim_encap_pimhdr));
ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
@ -2513,6 +2512,7 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
ip_outer->ip_tos = ip->ip_tos;
if (ip->ip_off & htons(IP_DF))
ip_outer->ip_off |= htons(IP_DF);
ip_fillid(ip_outer);
pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
+ sizeof(pim_encap_iphdr));
*pimhdr = pim_encap_pimhdr;

View File

@ -91,8 +91,6 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
VNET_DEFINE(uint32_t, ip_id);
#ifdef MBUF_STRESS_TEST
static int mbuf_frag_size = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
@ -174,21 +172,10 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
ip_len = ntohs(ip->ip_len);
ip_off = ntohs(ip->ip_off);
/*
* Fill in IP header. If we are not allowing fragmentation,
* then the ip_id field is meaningless, but we don't set it
* to zero. Doing so causes various problems when devices along
* the path (routers, load balancers, firewalls, etc.) illegally
* disable DF on our packet. Note that a 16-bit counter
* will wrap around in less than 10 seconds at 100 Mbit/s on a
* medium with MTU 1500. See Steven M. Bellovin, "A Technique
* for Counting NATted Hosts", Proc. IMW'02, available at
* <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
*/
if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
ip->ip_v = IPVERSION;
ip->ip_hl = hlen >> 2;
ip->ip_id = ip_newid();
ip_fillid(ip);
IPSTAT_INC(ips_localout);
} else {
/* Header already set, fetch hlen from there */

View File

@ -174,7 +174,6 @@ struct inpcb;
struct route;
struct sockopt;
VNET_DECLARE(uint32_t, ip_id); /* ip packet ctr, for ids */
VNET_DECLARE(int, ip_defttl); /* default IP ttl */
VNET_DECLARE(int, ipforwarding); /* ip forwarding */
#ifdef IPSTEALTH
@ -228,7 +227,7 @@ struct in_ifaddr *
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
void ip_slowtimo(void);
uint16_t ip_randomid(void);
void ip_fillid(struct ip *);
int rip_ctloutput(struct socket *, struct sockopt *);
void rip_ctlinput(int, struct sockaddr *, void *);
void rip_init(void);
@ -302,22 +301,6 @@ extern int (*ng_ipfw_input_p)(struct mbuf **, int,
extern int (*ip_dn_ctl_ptr)(struct sockopt *);
extern int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
VNET_DECLARE(int, ip_do_randomid);
#define V_ip_do_randomid VNET(ip_do_randomid)
static __inline uint16_t
ip_newid(void)
{
uint16_t res;
if (V_ip_do_randomid != 0)
return (ip_randomid());
else {
res = atomic_fetchadd_32(&V_ip_id, 1) & 0xFFFF;
return (htons(res));
}
}
#endif /* _KERNEL */
#endif /* !_NETINET_IP_VAR_H_ */

View File

@ -505,8 +505,12 @@ rip_output(struct mbuf *m, struct socket *so, ...)
m_freem(m);
return (EINVAL);
}
/*
* This doesn't allow application to specify ID of zero,
* but we got this limitation from the beginning of history.
*/
if (ip->ip_id == 0)
ip->ip_id = ip_newid();
ip_fillid(ip);
/*
* XXX prevent ip_output from overwriting header fields.

View File

@ -4106,7 +4106,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
ip->ip_off = htons(0);
}
/* FreeBSD has a function for ip_id's */
ip->ip_id = ip_newid();
ip_fillid(ip);
ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
ip->ip_len = htons(packet_length);
@ -10949,8 +10949,8 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
ip->ip_v = IPVERSION;
ip->ip_hl = (sizeof(struct ip) >> 2);
ip->ip_tos = 0;
ip->ip_id = ip_newid();
ip->ip_off = 0;
ip_fillid(ip);
ip->ip_ttl = MODULE_GLOBAL(ip_defttl);
if (port) {
ip->ip_p = IPPROTO_UDP;

View File

@ -136,9 +136,6 @@ ipip_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp,
ipo->ip_sum = 0;
ipo->ip_src = saidx->src.sin.sin_addr;
ipo->ip_dst = saidx->dst.sin.sin_addr;
ipo->ip_id = ip_newid();
/* If the inner protocol is IP... */
switch (tp) {
case IPVERSION:
@ -178,6 +175,7 @@ ipip_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp,
default:
goto nofamily;
}
ip_fillid(ipo);
otos = 0;
ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);

View File

@ -1538,7 +1538,7 @@ pfsync_sendout(int schedswi)
offset = sizeof(*ip);
ip->ip_len = htons(m->m_pkthdr.len);
ip->ip_id = htons(ip_randomid());
ip_fillid(ip);
/* build the pfsync header */
ph = (struct pfsync_header *)(m->m_data + offset);

View File

@ -2271,9 +2271,9 @@ pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
/* random-id, but not for fragments */
if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
u_int16_t ip_id = h->ip_id;
uint16_t ip_id = h->ip_id;
h->ip_id = ip_randomid();
ip_fillid(h);
h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
}
}