Add TCP Appropriate Byte Counting (RFC 3465) support to kernel.
The new behaviour is on by default, and can be disabled by setting the net.inet.tcp.rfc3465 sysctl to 0 to obtain previous behaviour. The patch changes struct tcpcb in sys/netinet/tcp_var.h which breaks the ABI. Bump __FreeBSD_version to 800061 accordingly. User space tools that rely on the size of struct tcpcb (e.g. sockstat) need to be recompiled. Reviewed by: rpaulo, gnn Approved by: gnn, kmacy (mentors) Sponsored by: FreeBSD Foundation
This commit is contained in:
parent
8e6c149fe3
commit
d5deb43d0f
6
UPDATING
6
UPDATING
@ -22,6 +22,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW:
|
||||
to maximize performance. (To disable malloc debugging, run
|
||||
ln -s aj /etc/malloc.conf.)
|
||||
|
||||
20090115:
|
||||
TCP Appropriate Byte Counting (RFC 3465) support added to kernel.
|
||||
New field in struct tcpcb breaks ABI, so bump __FreeBSD_version to
|
||||
800061. User space tools that rely on the size of struct tcpcb in
|
||||
tcp_var.h (e.g. sockstat) need to be recompiled.
|
||||
|
||||
20081225:
|
||||
ng_tty(4) module updated to match the new TTY subsystem.
|
||||
Due to API change, user-level applications must be updated.
|
||||
|
@ -117,6 +117,8 @@ int tcp_insecure_rst;
|
||||
int tcp_do_autorcvbuf;
|
||||
int tcp_autorcvbuf_inc;
|
||||
int tcp_autorcvbuf_max;
|
||||
int tcp_do_rfc3465;
|
||||
int tcp_abc_l_var;
|
||||
#endif
|
||||
|
||||
SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_tcp, TCPCTL_STATS, stats,
|
||||
@ -144,6 +146,13 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
|
||||
tcp_do_rfc3390, 0,
|
||||
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
|
||||
|
||||
SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
|
||||
tcp_do_rfc3465, 0,
|
||||
"Enable RFC 3465 (Appropriate Byte Counting)");
|
||||
SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW,
|
||||
tcp_abc_l_var, 2,
|
||||
"Cap the max cwnd increment during slow-start to this number of segments");
|
||||
|
||||
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
|
||||
SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_ecn, OID_AUTO, enable,
|
||||
CTLFLAG_RW, tcp_do_ecn, 0, "TCP ECN support");
|
||||
@ -2293,20 +2302,59 @@ process_ACK:
|
||||
|
||||
/*
|
||||
* When new data is acked, open the congestion window.
|
||||
* If the window gives us less than ssthresh packets
|
||||
* in flight, open exponentially (maxseg per packet).
|
||||
* Otherwise open linearly: maxseg per window
|
||||
* (maxseg^2 / cwnd per packet).
|
||||
* If cwnd > maxseg^2, fix the cwnd increment at 1 byte
|
||||
* to avoid capping cwnd (as suggested in RFC2581).
|
||||
* Method depends on which congestion control state we're
|
||||
* in (slow start or cong avoid) and if ABC (RFC 3465) is
|
||||
* enabled.
|
||||
*
|
||||
* slow start: cwnd <= ssthresh
|
||||
* cong avoid: cwnd > ssthresh
|
||||
*
|
||||
* slow start and ABC (RFC 3465):
|
||||
* Grow cwnd exponentially by the amount of data
|
||||
* ACKed capping the max increment per ACK to
|
||||
* (abc_l_var * maxseg) bytes.
|
||||
*
|
||||
* slow start without ABC (RFC 2581):
|
||||
* Grow cwnd exponentially by maxseg per ACK.
|
||||
*
|
||||
* cong avoid and ABC (RFC 3465):
|
||||
* Grow cwnd linearly by maxseg per RTT for each
|
||||
* cwnd worth of ACKed data.
|
||||
*
|
||||
* cong avoid without ABC (RFC 2581):
|
||||
* Grow cwnd linearly by approximately maxseg per RTT using
|
||||
* maxseg^2 / cwnd per ACK as the increment.
|
||||
* If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
|
||||
* avoid capping cwnd.
|
||||
*/
|
||||
if ((!V_tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) ||
|
||||
!IN_FASTRECOVERY(tp)) {
|
||||
u_int cw = tp->snd_cwnd;
|
||||
u_int incr = tp->t_maxseg;
|
||||
if (cw > tp->snd_ssthresh)
|
||||
incr = max((incr * incr / cw), 1);
|
||||
tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
|
||||
/* In congestion avoidance? */
|
||||
if (cw > tp->snd_ssthresh) {
|
||||
if (V_tcp_do_rfc3465) {
|
||||
tp->t_bytes_acked += acked;
|
||||
if (tp->t_bytes_acked >= tp->snd_cwnd)
|
||||
tp->t_bytes_acked -= cw;
|
||||
else
|
||||
incr = 0;
|
||||
}
|
||||
else
|
||||
incr = max((incr * incr / cw), 1);
|
||||
/*
|
||||
* In slow-start with ABC enabled and no RTO in sight?
|
||||
* (Must not use abc_l_var > 1 if slow starting after an
|
||||
* RTO. On RTO, snd_nxt = snd_una, so the snd_nxt ==
|
||||
* snd_max check is sufficient to handle this).
|
||||
*/
|
||||
} else if (V_tcp_do_rfc3465 &&
|
||||
tp->snd_nxt == tp->snd_max)
|
||||
incr = min(acked,
|
||||
V_tcp_abc_l_var * tp->t_maxseg);
|
||||
/* ABC is on by default, so (incr == 0) frequently. */
|
||||
if (incr > 0)
|
||||
tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
|
||||
}
|
||||
SOCKBUF_LOCK(&so->so_snd);
|
||||
if (acked > so->so_snd.sb_cc) {
|
||||
@ -2328,8 +2376,10 @@ process_ACK:
|
||||
tp->snd_recover = th->th_ack - 1;
|
||||
if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
|
||||
IN_FASTRECOVERY(tp) &&
|
||||
SEQ_GEQ(th->th_ack, tp->snd_recover))
|
||||
SEQ_GEQ(th->th_ack, tp->snd_recover)) {
|
||||
EXIT_FASTRECOVERY(tp);
|
||||
tp->t_bytes_acked = 0;
|
||||
}
|
||||
tp->snd_una = th->th_ack;
|
||||
if (tp->t_flags & TF_SACK_PERMIT) {
|
||||
if (SEQ_GT(tp->snd_una, tp->snd_recover))
|
||||
|
@ -316,6 +316,8 @@ tcp_init(void)
|
||||
V_tcp_do_autorcvbuf = 1;
|
||||
V_tcp_autorcvbuf_inc = 16*1024;
|
||||
V_tcp_autorcvbuf_max = 256*1024;
|
||||
V_tcp_do_rfc3465 = 1;
|
||||
V_tcp_abc_l_var = 2;
|
||||
|
||||
V_tcp_mssdflt = TCP_MSS;
|
||||
#ifdef INET6
|
||||
|
@ -587,6 +587,7 @@ tcp_timer_rexmt(void * xtp)
|
||||
tp->t_dupacks = 0;
|
||||
}
|
||||
EXIT_FASTRECOVERY(tp);
|
||||
tp->t_bytes_acked = 0;
|
||||
(void) tcp_output(tp);
|
||||
|
||||
out:
|
||||
|
@ -189,6 +189,7 @@ struct tcpcb {
|
||||
void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */
|
||||
struct toe_usrreqs *t_tu; /* offload operations vector */
|
||||
void *t_toe; /* TOE pcb pointer */
|
||||
int t_bytes_acked; /* # bytes acked during current RTT */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -127,6 +127,8 @@ struct vnet_inet {
|
||||
int _drop_synfin;
|
||||
int _tcp_do_rfc3042;
|
||||
int _tcp_do_rfc3390;
|
||||
int _tcp_do_rfc3465;
|
||||
int _tcp_abc_l_var;
|
||||
int _tcp_do_ecn;
|
||||
int _tcp_ecn_maxretries;
|
||||
int _tcp_insecure_rst;
|
||||
@ -291,6 +293,7 @@ extern struct vnet_inet vnet_inet_0;
|
||||
#define V_subnetsarelocal VNET_INET(subnetsarelocal)
|
||||
#define V_tcb VNET_INET(tcb)
|
||||
#define V_tcbinfo VNET_INET(tcbinfo)
|
||||
#define V_tcp_abc_l_var VNET_INET(tcp_abc_l_var)
|
||||
#define V_tcp_autorcvbuf_inc VNET_INET(tcp_autorcvbuf_inc)
|
||||
#define V_tcp_autorcvbuf_max VNET_INET(tcp_autorcvbuf_max)
|
||||
#define V_tcp_autosndbuf_inc VNET_INET(tcp_autosndbuf_inc)
|
||||
@ -303,6 +306,7 @@ extern struct vnet_inet vnet_inet_0;
|
||||
#define V_tcp_do_rfc1323 VNET_INET(tcp_do_rfc1323)
|
||||
#define V_tcp_do_rfc3042 VNET_INET(tcp_do_rfc3042)
|
||||
#define V_tcp_do_rfc3390 VNET_INET(tcp_do_rfc3390)
|
||||
#define V_tcp_do_rfc3465 VNET_INET(tcp_do_rfc3465)
|
||||
#define V_tcp_do_sack VNET_INET(tcp_do_sack)
|
||||
#define V_tcp_do_tso VNET_INET(tcp_do_tso)
|
||||
#define V_tcp_ecn_maxretries VNET_INET(tcp_ecn_maxretries)
|
||||
|
@ -57,7 +57,7 @@
|
||||
* is created, otherwise 1.
|
||||
*/
|
||||
#undef __FreeBSD_version
|
||||
#define __FreeBSD_version 800060 /* Master, propagated to newvers */
|
||||
#define __FreeBSD_version 800061 /* Master, propagated to newvers */
|
||||
|
||||
#ifndef LOCORE
|
||||
#include <sys/types.h>
|
||||
|
Loading…
x
Reference in New Issue
Block a user