tcp: Add a socket option to rack so we can test various changes to the slop value in timers.
Timer_slop, in TCP, has been 200ms for a long time. This value dates back a long time when delayed ack timers were longer and links were slower. A 200ms timer slop allows 1 MSS to be sent over a 60kbps link. Its possible that lowering this value to something more in line with todays delayed ack values (40ms) might improve TCP. This bit of code makes it so rack can, via a socket option, adjust the timer slop. Reviewed by: mtuexen Sponsered by: Netflix Inc Differential Revision: https://reviews.freebsd.org/D30249
This commit is contained in:
parent
38e7025a60
commit
4f3addd94b
@ -306,6 +306,7 @@ struct tcphdr {
|
|||||||
#define TCP_FAST_RSM_HACK 1137 /* Do we do the broken thing where we don't twiddle the TLP bits properly in fast_rsm_output? */
|
#define TCP_FAST_RSM_HACK 1137 /* Do we do the broken thing where we don't twiddle the TLP bits properly in fast_rsm_output? */
|
||||||
#define TCP_RACK_PACING_BETA 1138 /* Changing the beta for pacing */
|
#define TCP_RACK_PACING_BETA 1138 /* Changing the beta for pacing */
|
||||||
#define TCP_RACK_PACING_BETA_ECN 1139 /* Changing the beta for ecn with pacing */
|
#define TCP_RACK_PACING_BETA_ECN 1139 /* Changing the beta for ecn with pacing */
|
||||||
|
#define TCP_RACK_TIMER_SLOP 1140 /* Set or get the timer slop used */
|
||||||
|
|
||||||
/* Start of reserved space for third-party user-settable options. */
|
/* Start of reserved space for third-party user-settable options. */
|
||||||
#define TCP_VENDOR SO_VENDOR
|
#define TCP_VENDOR SO_VENDOR
|
||||||
|
@ -408,8 +408,8 @@ counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
|
|||||||
|
|
||||||
#define RACK_REXMTVAL(tp) max(rack_rto_min, ((tp)->t_srtt + ((tp)->t_rttvar << 2)))
|
#define RACK_REXMTVAL(tp) max(rack_rto_min, ((tp)->t_srtt + ((tp)->t_rttvar << 2)))
|
||||||
|
|
||||||
#define RACK_TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
|
#define RACK_TCPT_RANGESET(tv, value, tvmin, tvmax, slop) do { \
|
||||||
(tv) = (value) + TICKS_2_USEC(tcp_rexmit_slop); \
|
(tv) = (value) + slop; \
|
||||||
if ((u_long)(tv) < (u_long)(tvmin)) \
|
if ((u_long)(tv) < (u_long)(tvmin)) \
|
||||||
(tv) = (tvmin); \
|
(tv) = (tvmin); \
|
||||||
if ((u_long)(tv) > (u_long)(tvmax)) \
|
if ((u_long)(tv) > (u_long)(tvmax)) \
|
||||||
@ -2448,7 +2448,7 @@ rack_log_rtt_sample(struct tcp_rack *rack, uint32_t rtt)
|
|||||||
/* Lets capture all the things that make up t_rtxcur */
|
/* Lets capture all the things that make up t_rtxcur */
|
||||||
log.u_bbr.applimited = rack_rto_min;
|
log.u_bbr.applimited = rack_rto_min;
|
||||||
log.u_bbr.epoch = rack_rto_max;
|
log.u_bbr.epoch = rack_rto_max;
|
||||||
log.u_bbr.lt_epoch = rtt;
|
log.u_bbr.lt_epoch = rack->r_ctl.timer_slop;
|
||||||
log.u_bbr.lost = rack_rto_min;
|
log.u_bbr.lost = rack_rto_min;
|
||||||
log.u_bbr.pkt_epoch = TICKS_2_USEC(tcp_rexmit_slop);
|
log.u_bbr.pkt_epoch = TICKS_2_USEC(tcp_rexmit_slop);
|
||||||
log.u_bbr.rttProp = RACK_REXMTVAL(rack->rc_tp);
|
log.u_bbr.rttProp = RACK_REXMTVAL(rack->rc_tp);
|
||||||
@ -5260,7 +5260,7 @@ rack_get_persists_timer_val(struct tcpcb *tp, struct tcp_rack *rack)
|
|||||||
|
|
||||||
t = (tp->t_srtt + (tp->t_rttvar << 2));
|
t = (tp->t_srtt + (tp->t_rttvar << 2));
|
||||||
RACK_TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
|
RACK_TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
|
||||||
rack_persist_min, rack_persist_max);
|
rack_persist_min, rack_persist_max, rack->r_ctl.timer_slop);
|
||||||
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
|
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
|
||||||
tp->t_rxtshift++;
|
tp->t_rxtshift++;
|
||||||
rack->r_ctl.rc_hpts_flags |= PACE_TMR_PERSIT;
|
rack->r_ctl.rc_hpts_flags |= PACE_TMR_PERSIT;
|
||||||
@ -5526,7 +5526,7 @@ rack_enter_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
|
|||||||
rack_timer_cancel(tp, rack, cts, __LINE__);
|
rack_timer_cancel(tp, rack, cts, __LINE__);
|
||||||
tp->t_rxtshift = 0;
|
tp->t_rxtshift = 0;
|
||||||
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
||||||
rack_rto_min, rack_rto_max);
|
rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
|
||||||
rack->rc_in_persist = 1;
|
rack->rc_in_persist = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -5581,7 +5581,7 @@ rack_exit_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
|
|||||||
rack->r_ctl.rc_went_idle_time = 0;
|
rack->r_ctl.rc_went_idle_time = 0;
|
||||||
tp->t_rxtshift = 0;
|
tp->t_rxtshift = 0;
|
||||||
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
||||||
rack_rto_min, rack_rto_max);
|
rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
|
||||||
rack->r_ctl.rc_agg_delayed = 0;
|
rack->r_ctl.rc_agg_delayed = 0;
|
||||||
rack->r_early = 0;
|
rack->r_early = 0;
|
||||||
rack->r_late = 0;
|
rack->r_late = 0;
|
||||||
@ -6777,7 +6777,7 @@ rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
|
|||||||
rexmt = max(rack_rto_min, (tp->t_srtt + (tp->t_rttvar << 2))) * tcp_backoff[tp->t_rxtshift];
|
rexmt = max(rack_rto_min, (tp->t_srtt + (tp->t_rttvar << 2))) * tcp_backoff[tp->t_rxtshift];
|
||||||
|
|
||||||
RACK_TCPT_RANGESET(tp->t_rxtcur, rexmt,
|
RACK_TCPT_RANGESET(tp->t_rxtcur, rexmt,
|
||||||
max(rack_rto_min, rexmt), rack_rto_max);
|
max(rack_rto_min, rexmt), rack_rto_max, rack->r_ctl.timer_slop);
|
||||||
/*
|
/*
|
||||||
* We enter the path for PLMTUD if connection is established or, if
|
* We enter the path for PLMTUD if connection is established or, if
|
||||||
* connection is FIN_WAIT_1 status, reason for the last is that if
|
* connection is FIN_WAIT_1 status, reason for the last is that if
|
||||||
@ -7702,7 +7702,7 @@ tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp)
|
|||||||
*/
|
*/
|
||||||
tp->t_rxtshift = 0;
|
tp->t_rxtshift = 0;
|
||||||
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
||||||
max(rack_rto_min, rtt + 2), rack_rto_max);
|
max(rack_rto_min, rtt + 2), rack_rto_max, rack->r_ctl.timer_slop);
|
||||||
rack_log_rtt_sample(rack, rtt);
|
rack_log_rtt_sample(rack, rtt);
|
||||||
tp->t_softerror = 0;
|
tp->t_softerror = 0;
|
||||||
}
|
}
|
||||||
@ -7877,7 +7877,7 @@ rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
|
|||||||
*/
|
*/
|
||||||
tp->t_rxtshift = 0;
|
tp->t_rxtshift = 0;
|
||||||
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
||||||
rack_rto_min, rack_rto_max);
|
rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
|
||||||
tp->t_softerror = 0;
|
tp->t_softerror = 0;
|
||||||
if (to && (to->to_flags & TOF_TS) &&
|
if (to && (to->to_flags & TOF_TS) &&
|
||||||
(ack_type == CUM_ACKED) &&
|
(ack_type == CUM_ACKED) &&
|
||||||
@ -9735,7 +9735,7 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
|||||||
if (rack->rc_in_persist) {
|
if (rack->rc_in_persist) {
|
||||||
tp->t_rxtshift = 0;
|
tp->t_rxtshift = 0;
|
||||||
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
||||||
rack_rto_min, rack_rto_max);
|
rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
|
||||||
}
|
}
|
||||||
if ((th->th_ack == tp->snd_una) && (tiwin == tp->snd_wnd)) {
|
if ((th->th_ack == tp->snd_una) && (tiwin == tp->snd_wnd)) {
|
||||||
rack_strike_dupack(rack);
|
rack_strike_dupack(rack);
|
||||||
@ -9798,7 +9798,7 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
|||||||
/* assure we are not backed off */
|
/* assure we are not backed off */
|
||||||
tp->t_rxtshift = 0;
|
tp->t_rxtshift = 0;
|
||||||
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
||||||
rack_rto_min, rack_rto_max);
|
rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
|
||||||
rack->rc_tlp_in_progress = 0;
|
rack->rc_tlp_in_progress = 0;
|
||||||
rack->r_ctl.rc_tlp_cnt_out = 0;
|
rack->r_ctl.rc_tlp_cnt_out = 0;
|
||||||
/*
|
/*
|
||||||
@ -10648,7 +10648,7 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
|||||||
m_freem(mfree);
|
m_freem(mfree);
|
||||||
tp->t_rxtshift = 0;
|
tp->t_rxtshift = 0;
|
||||||
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
||||||
rack_rto_min, rack_rto_max);
|
rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
|
||||||
rack->rc_tlp_in_progress = 0;
|
rack->rc_tlp_in_progress = 0;
|
||||||
rack->r_ctl.rc_tlp_cnt_out = 0;
|
rack->r_ctl.rc_tlp_cnt_out = 0;
|
||||||
/*
|
/*
|
||||||
@ -12089,6 +12089,7 @@ rack_init(struct tcpcb *tp)
|
|||||||
rack->r_ctl.rc_lowest_us_rtt = 0xffffffff;
|
rack->r_ctl.rc_lowest_us_rtt = 0xffffffff;
|
||||||
rack->r_ctl.rc_highest_us_rtt = 0;
|
rack->r_ctl.rc_highest_us_rtt = 0;
|
||||||
rack->r_ctl.bw_rate_cap = rack_bw_rate_cap;
|
rack->r_ctl.bw_rate_cap = rack_bw_rate_cap;
|
||||||
|
rack->r_ctl.timer_slop = TICKS_2_USEC(tcp_rexmit_slop);
|
||||||
if (rack_use_cmp_acks)
|
if (rack_use_cmp_acks)
|
||||||
rack->r_use_cmp_ack = 1;
|
rack->r_use_cmp_ack = 1;
|
||||||
if (rack_disable_prr)
|
if (rack_disable_prr)
|
||||||
@ -13182,7 +13183,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
|
|||||||
/* Clear out shifts and such */
|
/* Clear out shifts and such */
|
||||||
tp->t_rxtshift = 0;
|
tp->t_rxtshift = 0;
|
||||||
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
||||||
rack_rto_min, rack_rto_max);
|
rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
|
||||||
rack->rc_tlp_in_progress = 0;
|
rack->rc_tlp_in_progress = 0;
|
||||||
rack->r_ctl.rc_tlp_cnt_out = 0;
|
rack->r_ctl.rc_tlp_cnt_out = 0;
|
||||||
/* Send recover and snd_nxt must be dragged along */
|
/* Send recover and snd_nxt must be dragged along */
|
||||||
@ -18879,6 +18880,19 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
|
|||||||
rack->r_ctl.rc_saved_beta.beta = optval;
|
rack->r_ctl.rc_saved_beta.beta = optval;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case TCP_RACK_TIMER_SLOP:
|
||||||
|
RACK_OPTS_INC(tcp_rack_timer_slop);
|
||||||
|
rack->r_ctl.timer_slop = optval;
|
||||||
|
if (rack->rc_tp->t_srtt) {
|
||||||
|
/*
|
||||||
|
* If we have an SRTT lets update t_rxtcur
|
||||||
|
* to have the new slop.
|
||||||
|
*/
|
||||||
|
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
|
||||||
|
rack_rto_min, rack_rto_max,
|
||||||
|
rack->r_ctl.timer_slop);
|
||||||
|
}
|
||||||
|
break;
|
||||||
case TCP_RACK_PACING_BETA_ECN:
|
case TCP_RACK_PACING_BETA_ECN:
|
||||||
RACK_OPTS_INC(tcp_rack_beta_ecn);
|
RACK_OPTS_INC(tcp_rack_beta_ecn);
|
||||||
if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) {
|
if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) {
|
||||||
@ -19525,6 +19539,7 @@ rack_set_sockopt(struct socket *so, struct sockopt *sopt,
|
|||||||
case TCP_DEFER_OPTIONS: /* URL:defer */
|
case TCP_DEFER_OPTIONS: /* URL:defer */
|
||||||
case TCP_RACK_PACING_BETA: /* URL:pacing_beta */
|
case TCP_RACK_PACING_BETA: /* URL:pacing_beta */
|
||||||
case TCP_RACK_PACING_BETA_ECN: /* URL:pacing_beta_ecn */
|
case TCP_RACK_PACING_BETA_ECN: /* URL:pacing_beta_ecn */
|
||||||
|
case TCP_RACK_TIMER_SLOP: /* URL:timer_slop */
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* Filter off all unknown options to the base stack */
|
/* Filter off all unknown options to the base stack */
|
||||||
@ -19856,6 +19871,9 @@ rack_get_sockopt(struct socket *so, struct sockopt *sopt,
|
|||||||
case TCP_SHARED_CWND_TIME_LIMIT:
|
case TCP_SHARED_CWND_TIME_LIMIT:
|
||||||
optval = rack->r_limit_scw;
|
optval = rack->r_limit_scw;
|
||||||
break;
|
break;
|
||||||
|
case TCP_RACK_TIMER_SLOP:
|
||||||
|
optval = rack->r_ctl.timer_slop;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return (tcp_default_ctloutput(so, sopt, inp, tp));
|
return (tcp_default_ctloutput(so, sopt, inp, tp));
|
||||||
break;
|
break;
|
||||||
|
@ -245,6 +245,7 @@ struct rack_opts_stats {
|
|||||||
uint64_t tcp_rack_fastrsm_hack;
|
uint64_t tcp_rack_fastrsm_hack;
|
||||||
uint64_t tcp_rack_beta;
|
uint64_t tcp_rack_beta;
|
||||||
uint64_t tcp_rack_beta_ecn;
|
uint64_t tcp_rack_beta_ecn;
|
||||||
|
uint64_t tcp_rack_timer_slop;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* RTT shrink reasons */
|
/* RTT shrink reasons */
|
||||||
@ -341,6 +342,7 @@ struct rack_control {
|
|||||||
struct rack_sendmap *rc_resend; /* something we have been asked to
|
struct rack_sendmap *rc_resend; /* something we have been asked to
|
||||||
* resend */
|
* resend */
|
||||||
struct rack_fast_send_blk fsb; /* The fast-send block */
|
struct rack_fast_send_blk fsb; /* The fast-send block */
|
||||||
|
uint32_t timer_slop;
|
||||||
uint32_t input_pkt;
|
uint32_t input_pkt;
|
||||||
uint32_t saved_input_pkt;
|
uint32_t saved_input_pkt;
|
||||||
uint32_t rc_hpts_flags;
|
uint32_t rc_hpts_flags;
|
||||||
|
Loading…
Reference in New Issue
Block a user