Implement TCP NewReno, as documented in RFC 2582. This allows

better recovery for multiple packet losses in a single window.
The algorithm can be toggled via the sysctl net.inet.tcp.newreno,
which defaults to "on".

Submitted by:  Jayanth Vijayaraghavan <jayanth@yahoo-inc.com>
This commit is contained in:
Jonathan Lemon 2000-05-06 03:31:09 +00:00
parent e003b0836e
commit 46f5848237
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=60067
6 changed files with 163 additions and 15 deletions

View File

@ -118,6 +118,8 @@ struct tcphdr {
#define TCP_MAX_WINSHIFT 14 /* maximum window shift */
#define TCP_MAXBURST 4 /* maximum segments in a burst */
#define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */
#define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr))
/* max space left for options */

View File

@ -148,6 +148,7 @@ static void tcp_pulloutofband __P((struct socket *,
static int tcp_reass __P((struct tcpcb *, struct tcphdr *, int *,
struct mbuf *));
static void tcp_xmit_timer __P((struct tcpcb *, int));
static int tcp_newreno __P((struct tcpcb *, struct tcphdr *));
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
#ifdef INET6
@ -1104,6 +1105,7 @@ tcp_input(m, off0, proto)
tp->irs = th->th_seq;
tcp_sendseqinit(tp);
tcp_rcvseqinit(tp);
tp->snd_recover = tp->snd_una;
/*
* Initialization of the tcpcb for transaction;
* set SND.WND = SEG.WND,
@ -1780,10 +1782,20 @@ tcp_input(m, off0, proto)
u_int win =
min(tp->snd_wnd, tp->snd_cwnd) / 2 /
tp->t_maxseg;
if (tcp_do_newreno && SEQ_LT(th->th_ack,
tp->snd_recover)) {
/* False retransmit, should not
* cut window
*/
tp->snd_cwnd += tp->t_maxseg;
tp->t_dupacks = 0;
(void) tcp_output(tp);
goto drop;
}
if (win < 2)
win = 2;
tp->snd_ssthresh = win * tp->t_maxseg;
tp->snd_recover = tp->snd_max;
callout_stop(tp->tt_rexmt);
tp->t_rtttime = 0;
tp->snd_nxt = th->th_ack;
@ -1807,10 +1819,26 @@ tcp_input(m, off0, proto)
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
if (tp->t_dupacks >= tcprexmtthresh &&
tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_dupacks = 0;
if (tcp_do_newreno == 0) {
if (tp->t_dupacks >= tcprexmtthresh &&
tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_dupacks = 0;
} else if (tp->t_dupacks >= tcprexmtthresh &&
!tcp_newreno(tp, th)) {
/*
* Window inflation should have left us with approx.
* snd_ssthresh outstanding data. But in case we
* would be inclined to send a burst, better to do
* it via the slow start mechanism.
*/
if (SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
tp->snd_cwnd =
tp->snd_max - th->th_ack + tp->t_maxseg;
else
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_dupacks = 0;
}
if (SEQ_GT(th->th_ack, tp->snd_max)) {
tcpstat.tcps_rcvacktoomuch++;
goto dropafterack;
@ -1903,7 +1931,8 @@ tcp_input(m, off0, proto)
if (cw > tp->snd_ssthresh)
incr = incr * incr / cw;
tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
if (tcp_do_newreno == 0 || SEQ_GEQ(th->th_ack, tp->snd_recover))
tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
}
if (acked > so->so_snd.sb_cc) {
tp->snd_wnd -= so->so_snd.sb_cc;
@ -2791,3 +2820,41 @@ tcp_mssopt(tp)
return rt->rt_ifp->if_mtu - min_protoh;
}
/*
* Checks for partial ack. If partial ack arrives, force the retransmission
* of the next unacknowledged segment, do not clear tp->t_dupacks, and return
* 1. By setting snd_nxt to ti_ack, this forces retransmission timer to
* be started again. If the ack advances at least to tp->snd_recover, return 0.
*/
static int
tcp_newreno(tp, th)
struct tcpcb *tp;
struct tcphdr *th;
{
if (SEQ_LT(th->th_ack, tp->snd_recover)) {
tcp_seq onxt = tp->snd_nxt;
tcp_seq ouna = tp->snd_una; /* Haven't updated snd_una yet*/
u_long ocwnd = tp->snd_cwnd;
callout_stop(tp->tt_rexmt);
tp->t_rtttime = 0;
tp->snd_nxt = th->th_ack;
tp->snd_cwnd = tp->t_maxseg;
tp->snd_una = th->th_ack;
(void) tcp_output(tp);
tp->snd_cwnd = ocwnd;
tp->snd_una = ouna;
if (SEQ_GT(onxt, tp->snd_nxt))
tp->snd_nxt = onxt;
/*
* Partial window deflation. Relies on fact that tp->snd_una
* not updated yet.
*/
tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
return (1);
}
return (0);
}

View File

@ -98,6 +98,9 @@ int ss_fltsz_local = TCP_MAXWIN; /* something large */
SYSCTL_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, CTLFLAG_RW,
&ss_fltsz_local, 1, "Slow start flight size for local networks");
int tcp_do_newreno = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
0, "Enable NewReno Algorithms");
/*
* Tcp output routine: figure out what should be sent and send it.
*/
@ -118,6 +121,7 @@ tcp_output(tp)
u_char opt[TCP_MAXOLEN];
unsigned ipoptlen, optlen, hdrlen;
int idle, sendalot;
int maxburst = TCP_MAXBURST;
struct rmxp_tao *taop;
struct rmxp_tao tao_noncached;
#ifdef INET6
@ -778,12 +782,12 @@ tcp_output(tp)
*/
if (!callout_active(tp->tt_rexmt) &&
tp->snd_nxt != tp->snd_una) {
callout_reset(tp->tt_rexmt, tp->t_rxtcur,
tcp_timer_rexmt, tp);
if (callout_active(tp->tt_persist)) {
callout_stop(tp->tt_persist);
tp->t_rxtshift = 0;
}
callout_reset(tp->tt_rexmt, tp->t_rxtcur,
tcp_timer_rexmt, tp);
}
} else
if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
@ -889,7 +893,7 @@ tcp_output(tp)
tp->t_flags &= ~TF_ACKNOW;
if (tcp_delack_enabled)
callout_stop(tp->tt_delack);
if (sendalot)
if (sendalot && (!tcp_do_newreno || --maxburst))
goto again;
return (0);
}

View File

@ -148,6 +148,7 @@ static void tcp_pulloutofband __P((struct socket *,
static int tcp_reass __P((struct tcpcb *, struct tcphdr *, int *,
struct mbuf *));
static void tcp_xmit_timer __P((struct tcpcb *, int));
static int tcp_newreno __P((struct tcpcb *, struct tcphdr *));
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
#ifdef INET6
@ -1104,6 +1105,7 @@ tcp_input(m, off0, proto)
tp->irs = th->th_seq;
tcp_sendseqinit(tp);
tcp_rcvseqinit(tp);
tp->snd_recover = tp->snd_una;
/*
* Initialization of the tcpcb for transaction;
* set SND.WND = SEG.WND,
@ -1780,10 +1782,20 @@ tcp_input(m, off0, proto)
u_int win =
min(tp->snd_wnd, tp->snd_cwnd) / 2 /
tp->t_maxseg;
if (tcp_do_newreno && SEQ_LT(th->th_ack,
tp->snd_recover)) {
/* False retransmit, should not
* cut window
*/
tp->snd_cwnd += tp->t_maxseg;
tp->t_dupacks = 0;
(void) tcp_output(tp);
goto drop;
}
if (win < 2)
win = 2;
tp->snd_ssthresh = win * tp->t_maxseg;
tp->snd_recover = tp->snd_max;
callout_stop(tp->tt_rexmt);
tp->t_rtttime = 0;
tp->snd_nxt = th->th_ack;
@ -1807,10 +1819,26 @@ tcp_input(m, off0, proto)
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
if (tp->t_dupacks >= tcprexmtthresh &&
tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_dupacks = 0;
if (tcp_do_newreno == 0) {
if (tp->t_dupacks >= tcprexmtthresh &&
tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_dupacks = 0;
} else if (tp->t_dupacks >= tcprexmtthresh &&
!tcp_newreno(tp, th)) {
/*
* Window inflation should have left us with approx.
* snd_ssthresh outstanding data. But in case we
* would be inclined to send a burst, better to do
* it via the slow start mechanism.
*/
if (SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
tp->snd_cwnd =
tp->snd_max - th->th_ack + tp->t_maxseg;
else
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_dupacks = 0;
}
if (SEQ_GT(th->th_ack, tp->snd_max)) {
tcpstat.tcps_rcvacktoomuch++;
goto dropafterack;
@ -1903,7 +1931,8 @@ tcp_input(m, off0, proto)
if (cw > tp->snd_ssthresh)
incr = incr * incr / cw;
tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
if (tcp_do_newreno == 0 || SEQ_GEQ(th->th_ack, tp->snd_recover))
tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
}
if (acked > so->so_snd.sb_cc) {
tp->snd_wnd -= so->so_snd.sb_cc;
@ -2791,3 +2820,41 @@ tcp_mssopt(tp)
return rt->rt_ifp->if_mtu - min_protoh;
}
/*
* Checks for partial ack. If partial ack arrives, force the retransmission
* of the next unacknowledged segment, do not clear tp->t_dupacks, and return
* 1. By setting snd_nxt to ti_ack, this forces retransmission timer to
* be started again. If the ack advances at least to tp->snd_recover, return 0.
*/
static int
tcp_newreno(tp, th)
struct tcpcb *tp;
struct tcphdr *th;
{
if (SEQ_LT(th->th_ack, tp->snd_recover)) {
tcp_seq onxt = tp->snd_nxt;
tcp_seq ouna = tp->snd_una; /* Haven't updated snd_una yet*/
u_long ocwnd = tp->snd_cwnd;
callout_stop(tp->tt_rexmt);
tp->t_rtttime = 0;
tp->snd_nxt = th->th_ack;
tp->snd_cwnd = tp->t_maxseg;
tp->snd_una = th->th_ack;
(void) tcp_output(tp);
tp->snd_cwnd = ocwnd;
tp->snd_una = ouna;
if (SEQ_GT(onxt, tp->snd_nxt))
tp->snd_nxt = onxt;
/*
* Partial window deflation. Relies on fact that tp->snd_una
* not updated yet.
*/
tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
return (1);
}
return (0);
}

View File

@ -415,6 +415,11 @@ tcp_timer_rexmt(xtp)
tp->t_srtt = 0;
}
tp->snd_nxt = tp->snd_una;
/*
* Note: We overload snd_recover to function also as the
* snd_last variable described in RFC 2582
*/
tp->snd_recover = tp->snd_max;
/*
* Force a segment to be sent.
*/

View File

@ -119,6 +119,8 @@ struct tcpcb {
* for slow start exponential to
* linear switch
*/
tcp_seq snd_recover; /* for use in fast recovery */
u_int t_maxopd; /* mss plus options */
u_long t_rcvtime; /* inactivity time */
@ -365,6 +367,7 @@ extern struct inpcbinfo tcbinfo;
extern struct tcpstat tcpstat; /* tcp statistics */
extern int tcp_mssdflt; /* XXX */
extern int tcp_delack_enabled;
extern int tcp_do_newreno;
extern int ss_fltsz;
extern int ss_fltsz_local;