From 5f311da2ccb6c216b79049172be840af4778129a Mon Sep 17 00:00:00 2001 From: Mike Silbersack Date: Sun, 2 Jan 2005 01:50:57 +0000 Subject: [PATCH] Port randomization leads to extremely fast port reuse at high connection rates, which is causing problems for some users. To retain the security advantage of random ports and ensure correct operation for high connection rate users, disable port randomization during periods of high connection rates. Whenever the connection rate exceeds randomcps (10 by default), randomization will be disabled for randomtime (45 by default) seconds. These thresholds may be tuned via sysctl. Many thanks to Igor Sysoev, who proved the necessity of this change and tested many preliminary versions of the patch. MFC After: 20 seconds --- sys/netinet/in_pcb.c | 56 +++++++++++++++++++++++++++++++++++++++--- sys/netinet/in_pcb.h | 2 ++ sys/netinet/ip_input.c | 14 +++++++++++ sys/netinet/ip_var.h | 1 + 4 files changed, 69 insertions(+), 4 deletions(-) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 11e5b0dd0d58..20a946553e30 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -59,6 +59,8 @@ #include #include #include +#include +#include #ifdef INET6 #include #include @@ -97,8 +99,13 @@ int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */ int ipport_reservedlow = 0; -/* Shall we allocate ephemeral ports in random order? */ -int ipport_randomized = 1; +/* Variables dealing with random ephemeral port allocation. */ +int ipport_randomized = 1; /* user controlled via sysctl */ +int ipport_randomcps = 10; /* user controlled via sysctl */ +int ipport_randomtime = 45; /* user controlled via sysctl */ +int ipport_stoprandom = 0; /* toggled by ipport_tick */ +int ipport_tcpallocs; +int ipport_tcplastcount; #define RANGECHK(var, min, max) \ if ((var) < (min)) { (var) = (min); } \ @@ -143,6 +150,10 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW, &ipport_randomized, 0, ""); +SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, + CTLFLAG_RW, &ipport_randomcps, 0, ""); +SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, + CTLFLAG_RW, &ipport_randomtime, 0, ""); /* * in_pcb.c: manage the Protocol Control Blocks. @@ -266,6 +277,7 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, cred) u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error, prison = 0; + int dorandom; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_LOCK_ASSERT(inp); @@ -393,6 +405,20 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, cred) last = ipport_lastauto; lastport = &pcbinfo->lastport; } + /* + * For UDP, use random port allocation as long as the user + * allows it. For TCP (and as of yet unknown) connections, + * use random port allocation only if the user allows it AND + * ipport_tick allows it. + */ + if (ipport_randomized && + (!ipport_stoprandom || pcbinfo == &udbinfo)) + dorandom = 1; + else + dorandom = 0; + /* Make sure to not include UDP packets in the count. */ + if (pcbinfo != &udbinfo) + ipport_tcpallocs++; /* * Simple check to ensure all ports are not used up causing * a deadlock here. @@ -404,7 +430,7 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, cred) /* * counting down */ - if (ipport_randomized) + if (dorandom) *lastport = first - (arc4random() % (first - last)); count = first - last; @@ -422,7 +448,7 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, cred) /* * counting up */ - if (ipport_randomized) + if (dorandom) *lastport = first + (arc4random() % (last - first)); count = last - first; @@ -1181,3 +1207,25 @@ in_pcbsosetlabel(so) INP_UNLOCK(inp); #endif } + +/* + * ipport_tick runs once per second, determining if random port + * allocation should be continued. If more than ipport_randomcps + * ports have been allocated in the last second, then we return to + * sequential port allocation. We return to random allocation only + * once we drop below ipport_randomcps for at least 5 seconds. + */ + +void +ipport_tick(xtp) + void *xtp; +{ + if (ipport_tcpallocs > ipport_tcplastcount + ipport_randomcps) { + ipport_stoprandom = ipport_randomtime; + } else { + if (ipport_stoprandom > 0) + ipport_stoprandom--; + } + ipport_tcplastcount = ipport_tcpallocs; + callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL); +} diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 2e8228eed391..6983480acdfb 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -333,6 +333,7 @@ extern int ipport_firstauto; extern int ipport_lastauto; extern int ipport_hifirstauto; extern int ipport_hilastauto; +extern struct callout ipport_tick_callout; void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); int in_pcballoc(struct socket *, struct inpcbinfo *, const char *); @@ -362,6 +363,7 @@ struct sockaddr * in_sockaddr(in_port_t port, struct in_addr *addr); void in_pcbsosetlabel(struct socket *so); void in_pcbremlists(struct inpcb *inp); +void ipport_tick(void *xtp); #endif /* _KERNEL */ #endif /* !_NETINET_IN_PCB_H_ */ diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index b950504b16f0..07c7ba4ed9f7 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -186,6 +187,7 @@ SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH]; struct mtx ipqlock; +struct callout ipport_tick_callout; #define IPQ_LOCK() mtx_lock(&ipqlock) #define IPQ_UNLOCK() mtx_unlock(&ipqlock) @@ -279,6 +281,12 @@ ip_init() maxnipq = nmbclusters / 32; maxfragsperpacket = 16; + /* Start ipport_tick. */ + callout_init(&ipport_tick_callout, CALLOUT_MPSAFE); + ipport_tick(NULL); + EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL, + SHUTDOWN_PRI_DEFAULT); + /* Initialize various other remaining things. */ ip_id = time_second & 0xffff; ipintrq.ifq_maxlen = ipqmaxlen; @@ -286,6 +294,12 @@ ip_init() netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE); } +void ip_fini(xtp) + void *xtp; +{ + callout_stop(&ipport_tick_callout); +} + /* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index b4c99d2d09dd..c0da82f6d5e8 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -159,6 +159,7 @@ extern struct pr_usrreqs rip_usrreqs; int ip_ctloutput(struct socket *, struct sockopt *sopt); void ip_drain(void); +void ip_fini(void *xtp); int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, u_long if_hwassist_flags, int sw_csum); void ip_freemoptions(struct ip_moptions *);