From 9d3e0245b758d8331d329301421f2d1a1e67e5ae Mon Sep 17 00:00:00 2001 From: hiren Date: Tue, 27 Oct 2015 09:43:05 +0000 Subject: [PATCH] Add sysctl tunable net.inet.tcp.initcwnd_segments to specify initial congestion window in number of segments on fly. It is set to 10 segments by default. Remove net.inet.tcp.experimental.initcwnd10 which is now redundant. Also remove the parent node net.inet.tcp.experimental as it's not needed anymore and also because it was not well thought out. Differential Revision: https://reviews.freebsd.org/D3858 In collaboration with: lstewart Reviewed by: gnn (prev version), rwatson, allanjude, wblock (man page) MFC after: 2 weeks Relnotes: yes Sponsored by: Limelight Networks --- share/man/man4/tcp.4 | 13 ++++++++++++- sys/netinet/tcp_input.c | 18 ++++++++---------- sys/netinet/tcp_var.h | 4 ++-- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4 index 351e4b4fa7da..8c5887fdcfde 100644 --- a/share/man/man4/tcp.4 +++ b/share/man/man4/tcp.4 @@ -34,7 +34,7 @@ .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd October 13, 2014 +.Dd October 27, 2015 .Dt TCP 4 .Os .Sh NAME @@ -454,6 +454,17 @@ code. For this reason, we use 200ms of slop and a near-0 minimum, which gives us an effective minimum of 200ms (similar to .Tn Linux ) . +.It Va initcwnd_segments +Enable the ability to specify initial congestion window in number of segments. +The default value is 10 as suggested by RFC 6928. +Changing the value on fly would not affect connections using congestion window +from the hostcache. +Caution: +This regulates the burst of packets allowed to be sent in the first RTT. +The value should be relative to the link capacity. +Start with small values for lower-capacity links. +Large bursts can cause buffer overruns and packet drops if routers have small +buffers or the link is experiencing congestion. .It Va rfc3042 Enable the Limited Transmit algorithm as described in RFC 3042. It helps avoid timeouts on lossy links and also when the congestion window diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 64971f3da293..8f5662033048 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -159,13 +159,10 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3390), 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); -SYSCTL_NODE(_net_inet_tcp, OID_AUTO, experimental, CTLFLAG_RW, 0, - "Experimental TCP extensions"); - -VNET_DEFINE(int, tcp_do_initcwnd10) = 1; -SYSCTL_INT(_net_inet_tcp_experimental, OID_AUTO, initcwnd10, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(tcp_do_initcwnd10), 0, - "Enable RFC 6928 (Increasing initial CWND to 10)"); +VNET_DEFINE(int, tcp_initcwnd_segments) = 10; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, initcwnd_segments, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_initcwnd_segments), 0, + "Slow-start flight size (initial congestion window) in number of segments"); VNET_DEFINE(int, tcp_do_rfc3465) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_VNET | CTLFLAG_RW, @@ -364,6 +361,7 @@ cc_conn_init(struct tcpcb *tp) * RFC5681 Section 3.1 specifies the default conservative values. * RFC3390 specifies slightly more aggressive values. * RFC6928 increases it to ten segments. + * Support for user specified value for initial flight size. * * If a SYN or SYN/ACK was lost and retransmitted, we have to * reduce the initial CWND to one segment as congestion is likely @@ -371,9 +369,9 @@ cc_conn_init(struct tcpcb *tp) */ if (tp->snd_cwnd == 1) tp->snd_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */ - else if (V_tcp_do_initcwnd10) - tp->snd_cwnd = min(10 * tp->t_maxseg, - max(2 * tp->t_maxseg, 14600)); + else if (V_tcp_initcwnd_segments) + tp->snd_cwnd = min(V_tcp_initcwnd_segments * tp->t_maxseg, + max(2 * tp->t_maxseg, V_tcp_initcwnd_segments * 1460)); else if (V_tcp_do_rfc3390) tp->snd_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, 4380)); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 07a28feb4956..2a2a148f6b7a 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -621,7 +621,7 @@ VNET_DECLARE(int, tcp_mssdflt); /* XXX */ VNET_DECLARE(int, tcp_minmss); VNET_DECLARE(int, tcp_delack_enabled); VNET_DECLARE(int, tcp_do_rfc3390); -VNET_DECLARE(int, tcp_do_initcwnd10); +VNET_DECLARE(int, tcp_initcwnd_segments); VNET_DECLARE(int, tcp_sendspace); VNET_DECLARE(int, tcp_recvspace); VNET_DECLARE(int, path_mtu_discovery); @@ -633,7 +633,7 @@ VNET_DECLARE(int, tcp_abc_l_var); #define V_tcp_minmss VNET(tcp_minmss) #define V_tcp_delack_enabled VNET(tcp_delack_enabled) #define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390) -#define V_tcp_do_initcwnd10 VNET(tcp_do_initcwnd10) +#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments) #define V_tcp_sendspace VNET(tcp_sendspace) #define V_tcp_recvspace VNET(tcp_recvspace) #define V_path_mtu_discovery VNET(path_mtu_discovery)