- Estimate the amount of data in flight in sack recovery and use it
to control the packets injected while in sack recovery (for both retransmissions and new data). - Cleanups to the sack codepaths in tcp_output.c and tcp_sack.c. - Add a new sysctl (net.inet.tcp.sack.initburst) that controls the number of sack retransmissions done upon initiation of sack recovery. Submitted by: Mohan Srinivasan <mohans@yahoo-inc.com>
This commit is contained in:
parent
9536269a6d
commit
c8e4aa1cd5
@ -155,6 +155,12 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
|
||||
&tcp_reass_overflows, 0,
|
||||
"Global number of TCP Segment Reassembly Queue Overflows");
|
||||
|
||||
static int tcp_sack_recovery_initburst = 3;
|
||||
SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO,
|
||||
initburst, CTLFLAG_RW,
|
||||
&tcp_sack_recovery_initburst, 0,
|
||||
"Initial Number of Rexmits when sack recovery is set up");
|
||||
|
||||
struct inpcbhead tcb;
|
||||
#define tcb6 tcb /* for KAME src sync over BSD*'s */
|
||||
struct inpcbinfo tcbinfo;
|
||||
@ -1980,9 +1986,9 @@ trimthenstep6:
|
||||
tp->t_rtttime = 0;
|
||||
if (tp->sack_enable) {
|
||||
tcpstat.tcps_sack_recovery_episode++;
|
||||
tp->snd_cwnd =
|
||||
tp->t_maxseg *
|
||||
tp->t_dupacks;
|
||||
tp->sack_newdata = tp->snd_nxt;
|
||||
tp->snd_cwnd =
|
||||
tp->t_maxseg * tcp_sack_recovery_initburst;
|
||||
(void) tcp_output(tp);
|
||||
tp->snd_cwnd +=
|
||||
tp->snd_ssthresh;
|
||||
|
@ -124,6 +124,7 @@ tcp_output(struct tcpcb *tp)
|
||||
unsigned ipoptlen, optlen, hdrlen;
|
||||
int idle, sendalot;
|
||||
int i, sack_rxmit;
|
||||
int sack_bytes_rxmt;
|
||||
struct sackhole *p;
|
||||
#if 0
|
||||
int maxburst = TCP_MAXBURST;
|
||||
@ -198,12 +199,16 @@ again:
|
||||
* Still in sack recovery , reset rxmit flag to zero.
|
||||
*/
|
||||
sack_rxmit = 0;
|
||||
sack_bytes_rxmt = 0;
|
||||
len = 0;
|
||||
p = NULL;
|
||||
if (tp->sack_enable && IN_FASTRECOVERY(tp) &&
|
||||
(p = tcp_sack_output(tp))) {
|
||||
KASSERT(tp->snd_cwnd >= 0,
|
||||
("%s: CWIN is negative : %ld", __func__, tp->snd_cwnd));
|
||||
(p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
|
||||
long cwin;
|
||||
|
||||
cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt;
|
||||
if (cwin < 0)
|
||||
cwin = 0;
|
||||
/* Do not retransmit SACK segments beyond snd_recover */
|
||||
if (SEQ_GT(p->end, tp->snd_recover)) {
|
||||
/*
|
||||
@ -222,10 +227,10 @@ again:
|
||||
goto after_sack_rexmit;
|
||||
} else
|
||||
/* Can rexmit part of the current hole */
|
||||
len = ((long)ulmin(tp->snd_cwnd,
|
||||
tp->snd_recover - p->rxmit));
|
||||
len = ((long)ulmin(cwin,
|
||||
tp->snd_recover - p->rxmit));
|
||||
} else
|
||||
len = ((long)ulmin(tp->snd_cwnd, p->end - p->rxmit));
|
||||
len = ((long)ulmin(cwin, p->end - p->rxmit));
|
||||
sack_rxmit = 1;
|
||||
sendalot = 1;
|
||||
off = p->rxmit - tp->snd_una;
|
||||
@ -295,8 +300,25 @@ after_sack_rexmit:
|
||||
* If sack_rxmit is true we are retransmitting from the scoreboard
|
||||
* in which case len is already set.
|
||||
*/
|
||||
if (!sack_rxmit)
|
||||
len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off);
|
||||
if (sack_rxmit == 0) {
|
||||
if (sack_bytes_rxmt == 0)
|
||||
len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off);
|
||||
else {
|
||||
long cwin;
|
||||
|
||||
/*
|
||||
* We are inside of a SACK recovery episode and are
|
||||
* sending new data, having retransmitted all the
|
||||
* data possible in the scoreboard.
|
||||
*/
|
||||
len = so->so_snd.sb_cc - off;
|
||||
cwin = sendwin - (tp->snd_nxt - tp->sack_newdata) -
|
||||
sack_bytes_rxmt;
|
||||
if (cwin < 0)
|
||||
cwin = 0;
|
||||
len = lmin(len, cwin);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Lop off SYN bit if it has already been sent. However, if this
|
||||
@ -850,12 +872,13 @@ send:
|
||||
* case, since we know we aren't doing a retransmission.
|
||||
* (retransmit and persist are mutually exclusive...)
|
||||
*/
|
||||
if (len || (flags & (TH_SYN|TH_FIN))
|
||||
|| callout_active(tp->tt_persist))
|
||||
th->th_seq = htonl(tp->snd_nxt);
|
||||
else
|
||||
th->th_seq = htonl(tp->snd_max);
|
||||
if (sack_rxmit) {
|
||||
if (sack_rxmit == 0) {
|
||||
if (len || (flags & (TH_SYN|TH_FIN))
|
||||
|| callout_active(tp->tt_persist))
|
||||
th->th_seq = htonl(tp->snd_nxt);
|
||||
else
|
||||
th->th_seq = htonl(tp->snd_max);
|
||||
} else {
|
||||
th->th_seq = htonl(p->rxmit);
|
||||
p->rxmit += len;
|
||||
}
|
||||
@ -956,7 +979,7 @@ send:
|
||||
tp->t_flags |= TF_SENTFIN;
|
||||
}
|
||||
}
|
||||
if (tp->sack_enable && sack_rxmit)
|
||||
if (sack_rxmit)
|
||||
goto timer;
|
||||
tp->snd_nxt += len;
|
||||
if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
|
||||
@ -981,18 +1004,9 @@ send:
|
||||
* of retransmit time.
|
||||
*/
|
||||
timer:
|
||||
if (tp->sack_enable && sack_rxmit &&
|
||||
!callout_active(tp->tt_rexmt) &&
|
||||
tp->snd_nxt != tp->snd_max) {
|
||||
callout_reset(tp->tt_rexmt, tp->t_rxtcur,
|
||||
tcp_timer_rexmt, tp);
|
||||
if (callout_active(tp->tt_persist)) {
|
||||
callout_stop(tp->tt_persist);
|
||||
tp->t_rxtshift = 0;
|
||||
}
|
||||
}
|
||||
if (!callout_active(tp->tt_rexmt) &&
|
||||
tp->snd_nxt != tp->snd_una) {
|
||||
((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
|
||||
(tp->snd_nxt != tp->snd_una))) {
|
||||
if (callout_active(tp->tt_persist)) {
|
||||
callout_stop(tp->tt_persist);
|
||||
tp->t_rxtshift = 0;
|
||||
|
@ -155,6 +155,12 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
|
||||
&tcp_reass_overflows, 0,
|
||||
"Global number of TCP Segment Reassembly Queue Overflows");
|
||||
|
||||
static int tcp_sack_recovery_initburst = 3;
|
||||
SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO,
|
||||
initburst, CTLFLAG_RW,
|
||||
&tcp_sack_recovery_initburst, 0,
|
||||
"Initial Number of Rexmits when sack recovery is set up");
|
||||
|
||||
struct inpcbhead tcb;
|
||||
#define tcb6 tcb /* for KAME src sync over BSD*'s */
|
||||
struct inpcbinfo tcbinfo;
|
||||
@ -1980,9 +1986,9 @@ trimthenstep6:
|
||||
tp->t_rtttime = 0;
|
||||
if (tp->sack_enable) {
|
||||
tcpstat.tcps_sack_recovery_episode++;
|
||||
tp->snd_cwnd =
|
||||
tp->t_maxseg *
|
||||
tp->t_dupacks;
|
||||
tp->sack_newdata = tp->snd_nxt;
|
||||
tp->snd_cwnd =
|
||||
tp->t_maxseg * tcp_sack_recovery_initburst;
|
||||
(void) tcp_output(tp);
|
||||
tp->snd_cwnd +=
|
||||
tp->snd_ssthresh;
|
||||
|
@ -164,6 +164,11 @@ struct tcphdr tcp_savetcp;
|
||||
|
||||
extern struct uma_zone *sack_hole_zone;
|
||||
|
||||
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
|
||||
int tcp_do_sack = 1;
|
||||
SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
|
||||
&tcp_do_sack, 0, "Enable/Disable TCP SACK support");
|
||||
|
||||
/*
|
||||
* This function is called upon receipt of new valid data (while not in header
|
||||
* prediction mode), and it updates the ordered list of sacks.
|
||||
@ -486,18 +491,19 @@ tcp_sack_partialack(tp, th)
|
||||
{
|
||||
INP_LOCK_ASSERT(tp->t_inpcb);
|
||||
u_long ocwnd = tp->snd_cwnd;
|
||||
int sack_bytes_rexmt = 0;
|
||||
|
||||
callout_stop(tp->tt_rexmt);
|
||||
tp->t_rtttime = 0;
|
||||
/*
|
||||
* Set snd_cwnd to one segment beyond acknowledged offset
|
||||
* (tp->snd_una has not yet been updated when this function is called.)
|
||||
* Set cwnd so we can send one more segment (either rexmit based on
|
||||
* scoreboard or new segment). Set cwnd to the amount of data
|
||||
* rexmitted from scoreboard plus the amount of new data transmitted
|
||||
* in this sack recovery episode plus one segment.
|
||||
*/
|
||||
/*
|
||||
* Should really be
|
||||
* min(tp->snd_cwnd, tp->t_maxseg + (th->th_ack - tp->snd_una))
|
||||
*/
|
||||
tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
|
||||
(void)tcp_sack_output(tp, &sack_bytes_rexmt);
|
||||
tp->snd_cwnd = sack_bytes_rexmt + (tp->snd_nxt - tp->sack_newdata) +
|
||||
tp->t_maxseg;
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
(void) tcp_output(tp);
|
||||
tp->snd_cwnd = ocwnd;
|
||||
@ -529,29 +535,29 @@ tcp_print_holes(struct tcpcb *tp)
|
||||
* NULL otherwise.
|
||||
*/
|
||||
struct sackhole *
|
||||
tcp_sack_output(struct tcpcb *tp)
|
||||
tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
|
||||
{
|
||||
struct sackhole *p;
|
||||
struct sackhole *p = NULL;
|
||||
|
||||
INP_LOCK_ASSERT(tp->t_inpcb);
|
||||
if (!tp->sack_enable)
|
||||
return (NULL);
|
||||
p = tp->snd_holes;
|
||||
while (p) {
|
||||
*sack_bytes_rexmt = 0;
|
||||
for (p = tp->snd_holes; p ; p = p->next) {
|
||||
if (SEQ_LT(p->rxmit, p->end)) {
|
||||
if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */
|
||||
p = p->next;
|
||||
continue;
|
||||
}
|
||||
#ifdef TCP_SACK_DEBUG
|
||||
if (p)
|
||||
tcp_print_holes(tp);
|
||||
#endif
|
||||
return (p);
|
||||
*sack_bytes_rexmt += (p->rxmit - p->start);
|
||||
break;
|
||||
}
|
||||
p = p->next;
|
||||
*sack_bytes_rexmt += (p->rxmit - p->start);
|
||||
}
|
||||
return (NULL);
|
||||
return (p);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -588,4 +594,3 @@ tcp_sack_adjust(struct tcpcb *tp)
|
||||
tp->snd_nxt = tp->rcv_lastsack;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -206,11 +206,6 @@ static int tcp_inflight_stab = 20;
|
||||
SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
|
||||
&tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets");
|
||||
|
||||
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
|
||||
int tcp_do_sack = 1;
|
||||
SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
|
||||
&tcp_do_sack, 0, "Enable/Disable TCP SACK support");
|
||||
|
||||
uma_zone_t sack_hole_zone;
|
||||
|
||||
static struct inpcb *tcp_notify(struct inpcb *, int);
|
||||
|
@ -206,11 +206,6 @@ static int tcp_inflight_stab = 20;
|
||||
SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
|
||||
&tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets");
|
||||
|
||||
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
|
||||
int tcp_do_sack = 1;
|
||||
SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
|
||||
&tcp_do_sack, 0, "Enable/Disable TCP SACK support");
|
||||
|
||||
uma_zone_t sack_hole_zone;
|
||||
|
||||
static struct inpcb *tcp_notify(struct inpcb *, int);
|
||||
|
@ -200,6 +200,8 @@ struct tcpcb {
|
||||
tcp_seq rcv_lastsack; /* last seq number(+1) sack'd by rcv'r*/
|
||||
int rcv_numsacks; /* # distinct sack blks present */
|
||||
struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
|
||||
tcp_seq sack_newdata; /* New data xmitted in this recovery
|
||||
episode starts at this seq number */
|
||||
};
|
||||
|
||||
#define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY)
|
||||
@ -523,6 +525,7 @@ struct xtcpcb {
|
||||
#ifdef _KERNEL
|
||||
#ifdef SYSCTL_DECL
|
||||
SYSCTL_DECL(_net_inet_tcp);
|
||||
SYSCTL_DECL(_net_inet_tcp_sack);
|
||||
#endif
|
||||
|
||||
extern struct inpcbhead tcb; /* head of queue of active tcpcb's */
|
||||
@ -617,7 +620,7 @@ void tcp_update_sack_list(struct tcpcb *tp);
|
||||
void tcp_del_sackholes(struct tcpcb *, struct tcphdr *);
|
||||
void tcp_clean_sackreport(struct tcpcb *tp);
|
||||
void tcp_sack_adjust(struct tcpcb *tp);
|
||||
struct sackhole *tcp_sack_output(struct tcpcb *tp);
|
||||
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
|
||||
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
|
||||
void tcp_free_sackholes(struct tcpcb *tp);
|
||||
int tcp_newreno(struct tcpcb *, struct tcphdr *);
|
||||
|
Loading…
x
Reference in New Issue
Block a user