Add TCP Appropriate Byte Counting (RFC 3465) support to kernel.

The new behaviour is on by default, and can be disabled by setting the
net.inet.tcp.rfc3465 sysctl to 0 to obtain previous behaviour.

The patch changes struct tcpcb in sys/netinet/tcp_var.h which breaks
the ABI. Bump __FreeBSD_version to 800061 accordingly. User space tools
that rely on the size of struct tcpcb (e.g. sockstat) need to be recompiled.

Reviewed by:	rpaulo, gnn
Approved by:	gnn, kmacy (mentors)
Sponsored by:	FreeBSD Foundation
This commit is contained in:
lstewart 2009-01-15 06:44:22 +00:00
parent 8e6c149fe3
commit d5deb43d0f
7 changed files with 75 additions and 11 deletions

View File

@ -22,6 +22,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW:
to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
20090115:
TCP Appropriate Byte Counting (RFC 3465) support added to kernel.
New field in struct tcpcb breaks ABI, so bump __FreeBSD_version to
800061. User space tools that rely on the size of struct tcpcb in
tcp_var.h (e.g. sockstat) need to be recompiled.
20081225:
ng_tty(4) module updated to match the new TTY subsystem.
Due to API change, user-level applications must be updated.

View File

@ -117,6 +117,8 @@ int tcp_insecure_rst;
int tcp_do_autorcvbuf;
int tcp_autorcvbuf_inc;
int tcp_autorcvbuf_max;
int tcp_do_rfc3465;
int tcp_abc_l_var;
#endif
SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_tcp, TCPCTL_STATS, stats,
@ -144,6 +146,13 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
tcp_do_rfc3390, 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
tcp_do_rfc3465, 0,
"Enable RFC 3465 (Appropriate Byte Counting)");
SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW,
tcp_abc_l_var, 2,
"Cap the max cwnd increment during slow-start to this number of segments");
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_ecn, OID_AUTO, enable,
CTLFLAG_RW, tcp_do_ecn, 0, "TCP ECN support");
@ -2293,20 +2302,59 @@ process_ACK:
/*
* When new data is acked, open the congestion window.
* If the window gives us less than ssthresh packets
* in flight, open exponentially (maxseg per packet).
* Otherwise open linearly: maxseg per window
* (maxseg^2 / cwnd per packet).
* If cwnd > maxseg^2, fix the cwnd increment at 1 byte
* to avoid capping cwnd (as suggested in RFC2581).
* Method depends on which congestion control state we're
* in (slow start or cong avoid) and if ABC (RFC 3465) is
* enabled.
*
* slow start: cwnd <= ssthresh
* cong avoid: cwnd > ssthresh
*
* slow start and ABC (RFC 3465):
* Grow cwnd exponentially by the amount of data
* ACKed capping the max increment per ACK to
* (abc_l_var * maxseg) bytes.
*
* slow start without ABC (RFC 2581):
* Grow cwnd exponentially by maxseg per ACK.
*
* cong avoid and ABC (RFC 3465):
* Grow cwnd linearly by maxseg per RTT for each
* cwnd worth of ACKed data.
*
* cong avoid without ABC (RFC 2581):
* Grow cwnd linearly by approximately maxseg per RTT using
* maxseg^2 / cwnd per ACK as the increment.
* If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
* avoid capping cwnd.
*/
if ((!V_tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) ||
!IN_FASTRECOVERY(tp)) {
u_int cw = tp->snd_cwnd;
u_int incr = tp->t_maxseg;
if (cw > tp->snd_ssthresh)
incr = max((incr * incr / cw), 1);
tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
/* In congestion avoidance? */
if (cw > tp->snd_ssthresh) {
if (V_tcp_do_rfc3465) {
tp->t_bytes_acked += acked;
if (tp->t_bytes_acked >= tp->snd_cwnd)
tp->t_bytes_acked -= cw;
else
incr = 0;
}
else
incr = max((incr * incr / cw), 1);
/*
* In slow-start with ABC enabled and no RTO in sight?
* (Must not use abc_l_var > 1 if slow starting after an
* RTO. On RTO, snd_nxt = snd_una, so the snd_nxt ==
* snd_max check is sufficient to handle this).
*/
} else if (V_tcp_do_rfc3465 &&
tp->snd_nxt == tp->snd_max)
incr = min(acked,
V_tcp_abc_l_var * tp->t_maxseg);
/* ABC is on by default, so (incr == 0) frequently. */
if (incr > 0)
tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
}
SOCKBUF_LOCK(&so->so_snd);
if (acked > so->so_snd.sb_cc) {
@ -2328,8 +2376,10 @@ process_ACK:
tp->snd_recover = th->th_ack - 1;
if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
IN_FASTRECOVERY(tp) &&
SEQ_GEQ(th->th_ack, tp->snd_recover))
SEQ_GEQ(th->th_ack, tp->snd_recover)) {
EXIT_FASTRECOVERY(tp);
tp->t_bytes_acked = 0;
}
tp->snd_una = th->th_ack;
if (tp->t_flags & TF_SACK_PERMIT) {
if (SEQ_GT(tp->snd_una, tp->snd_recover))

View File

@ -316,6 +316,8 @@ tcp_init(void)
V_tcp_do_autorcvbuf = 1;
V_tcp_autorcvbuf_inc = 16*1024;
V_tcp_autorcvbuf_max = 256*1024;
V_tcp_do_rfc3465 = 1;
V_tcp_abc_l_var = 2;
V_tcp_mssdflt = TCP_MSS;
#ifdef INET6

View File

@ -587,6 +587,7 @@ tcp_timer_rexmt(void * xtp)
tp->t_dupacks = 0;
}
EXIT_FASTRECOVERY(tp);
tp->t_bytes_acked = 0;
(void) tcp_output(tp);
out:

View File

@ -189,6 +189,7 @@ struct tcpcb {
void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */
struct toe_usrreqs *t_tu; /* offload operations vector */
void *t_toe; /* TOE pcb pointer */
int t_bytes_acked; /* # bytes acked during current RTT */
};
/*

View File

@ -127,6 +127,8 @@ struct vnet_inet {
int _drop_synfin;
int _tcp_do_rfc3042;
int _tcp_do_rfc3390;
int _tcp_do_rfc3465;
int _tcp_abc_l_var;
int _tcp_do_ecn;
int _tcp_ecn_maxretries;
int _tcp_insecure_rst;
@ -291,6 +293,7 @@ extern struct vnet_inet vnet_inet_0;
#define V_subnetsarelocal VNET_INET(subnetsarelocal)
#define V_tcb VNET_INET(tcb)
#define V_tcbinfo VNET_INET(tcbinfo)
#define V_tcp_abc_l_var VNET_INET(tcp_abc_l_var)
#define V_tcp_autorcvbuf_inc VNET_INET(tcp_autorcvbuf_inc)
#define V_tcp_autorcvbuf_max VNET_INET(tcp_autorcvbuf_max)
#define V_tcp_autosndbuf_inc VNET_INET(tcp_autosndbuf_inc)
@ -303,6 +306,7 @@ extern struct vnet_inet vnet_inet_0;
#define V_tcp_do_rfc1323 VNET_INET(tcp_do_rfc1323)
#define V_tcp_do_rfc3042 VNET_INET(tcp_do_rfc3042)
#define V_tcp_do_rfc3390 VNET_INET(tcp_do_rfc3390)
#define V_tcp_do_rfc3465 VNET_INET(tcp_do_rfc3465)
#define V_tcp_do_sack VNET_INET(tcp_do_sack)
#define V_tcp_do_tso VNET_INET(tcp_do_tso)
#define V_tcp_ecn_maxretries VNET_INET(tcp_ecn_maxretries)

View File

@ -57,7 +57,7 @@
* is created, otherwise 1.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 800060 /* Master, propagated to newvers */
#define __FreeBSD_version 800061 /* Master, propagated to newvers */
#ifndef LOCORE
#include <sys/types.h>