This commit brings things into sync with the advancements that

have been made in rack and adds a few fixes in BBR. This also
removes any possibility of incorrectly doing OOB data the stacks
do not support it. Should fix the skyzaller crashes seen in the
past. Still to fix is the BBR issue just reported this weekend
with the SYN and on sending a RST. Note that this version of
rack can now do pacing as well.

Sponsored by:Netflix Inc
Differential Revision:https://reviews.freebsd.org/D24576
This commit is contained in:
Randall Stewart 2020-05-04 20:28:53 +00:00
parent d3b6c96b7d
commit 963fb2ad94
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=360639
6 changed files with 6081 additions and 1538 deletions

View File

@ -1,7 +1,5 @@
/*-
* Copyright (c) 2016-9
* Netflix Inc.
* All rights reserved.
* Copyright (c) 2016-2020 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -72,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/tim_filter.h>
#include <sys/time.h>
#include <sys/protosw.h>
#include <vm/uma.h>
#include <sys/kern_prefetch.h>
@ -1853,28 +1852,6 @@ bbr_init_sysctls(void)
&bbr_clear_lost, 0, sysctl_bbr_clear_lost, "IU", "Clear lost counters");
}
static inline int32_t
bbr_progress_timeout_check(struct tcp_bbr *bbr)
{
if (bbr->rc_tp->t_maxunacktime && bbr->rc_tp->t_acktime &&
TSTMP_GT(ticks, bbr->rc_tp->t_acktime)) {
if ((((uint32_t)ticks - bbr->rc_tp->t_acktime)) >= bbr->rc_tp->t_maxunacktime) {
/*
* There is an assumption here that the caller will
* drop the connection, so we increment the
* statistics.
*/
bbr_log_progress_event(bbr, bbr->rc_tp, ticks, PROGRESS_DROP, __LINE__);
BBR_STAT_INC(bbr_progress_drops);
#ifdef NETFLIX_STATS
KMOD_TCPSTAT_INC(tcps_progdrops);
#endif
return (1);
}
}
return (0);
}
static void
bbr_counter_destroy(void)
{
@ -1884,6 +1861,8 @@ bbr_counter_destroy(void)
COUNTER_ARRAY_FREE(bbr_state_lost, BBR_MAX_STAT);
COUNTER_ARRAY_FREE(bbr_state_time, BBR_MAX_STAT);
COUNTER_ARRAY_FREE(bbr_state_resend, BBR_MAX_STAT);
counter_u64_free(bbr_nohdwr_pacing_enobuf);
counter_u64_free(bbr_hdwr_pacing_enobuf);
counter_u64_free(bbr_flows_whdwr_pacing);
counter_u64_free(bbr_flows_nohdwr_pacing);
@ -4643,7 +4622,8 @@ bbr_timeout_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
/* Its not time yet */
return (0);
}
if (bbr_progress_timeout_check(bbr)) {
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
return (1);
}
@ -4815,9 +4795,8 @@ bbr_timeout_delack(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
}
/*
* Persists timer, here we simply need to setup the
* FORCE-DATA flag the output routine will send
* the one byte send.
* Here we send a KEEP-ALIVE like probe to the
* peer, we do not send data.
*
* We only return 1, saying don't proceed, if all timers
* are stopped (destroyed PCB?).
@ -4845,7 +4824,8 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
/*
* Have we exceeded the user specified progress time?
*/
if (bbr_progress_timeout_check(bbr)) {
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
}
@ -4859,6 +4839,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
}
@ -4875,6 +4856,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
if (tp->t_state > TCPS_CLOSE_WAIT &&
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
}
@ -4947,6 +4929,7 @@ bbr_timeout_keepalive(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
return (1);
dropit:
KMOD_TCPSTAT_INC(tcps_keepdrops);
tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
return (1);
}
@ -5058,8 +5041,9 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
* retransmit interval. Back off to a longer retransmit interval
* and retransmit one segment.
*/
if (bbr_progress_timeout_check(bbr)) {
if (ctf_progress_timeout_check(tp, true)) {
retval = 1;
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
}
@ -5078,6 +5062,7 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
tp->t_rxtshift = TCP_MAXRXTSHIFT;
KMOD_TCPSTAT_INC(tcps_timeoutdrop);
retval = 1;
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
tcp_set_inp_to_drop(bbr->rc_inp,
(tp->t_softerror ? (uint16_t) tp->t_softerror : ETIMEDOUT));
goto out;
@ -8050,6 +8035,9 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
* to reset him.
*/
*ret_val = 1;
tcp_log_end_status(tp, TCP_EI_STATUS_DATA_A_CLOSE);
/* tcp_close will kill the inp pre-log the Reset */
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST);
tp = tcp_close(tp);
ctf_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, tlen);
BBR_STAT_INC(bbr_dropped_af_data);
@ -8132,7 +8120,6 @@ bbr_exit_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, int32_t li
idle_time = bbr_calc_time(cts, bbr->r_ctl.rc_went_idle_time);
bbr->rc_in_persist = 0;
bbr->rc_hit_state_1 = 0;
tp->t_flags &= ~TF_FORCEDATA;
bbr->r_ctl.rc_del_time = cts;
/*
* We invalidate the last ack here since we
@ -8390,66 +8377,12 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
return (0);
}
/*
* Process segments with URG.
* We don't support urgent data but
* drag along the up just to make sure
* if there is a stack switch no one
* is surprised.
*/
if ((thflags & TH_URG) && th->th_urp &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
/*
* This is a kludge, but if we receive and accept random
* urgent pointers, we'll crash in soreceive. It's hard to
* imagine someone actually wanting to send this much urgent
* data.
*/
SOCKBUF_LOCK(&so->so_rcv);
if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
th->th_urp = 0; /* XXX */
thflags &= ~TH_URG; /* XXX */
SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */
goto dodata; /* XXX */
}
/*
* If this segment advances the known urgent pointer, then
* mark the data stream. This should not happen in
* CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since a
* FIN has been received from the remote side. In these
* states we ignore the URG.
*
* According to RFC961 (Assigned Protocols), the urgent
* pointer points to the last octet of urgent data. We
* continue, however, to consider it to indicate the first
* octet of data past the urgent section as the original
* spec states (in one of two places).
*/
if (SEQ_GT(th->th_seq + th->th_urp, tp->rcv_up)) {
tp->rcv_up = th->th_seq + th->th_urp;
so->so_oobmark = sbavail(&so->so_rcv) +
(tp->rcv_up - tp->rcv_nxt) - 1;
if (so->so_oobmark == 0)
so->so_rcv.sb_state |= SBS_RCVATMARK;
sohasoutofband(so);
tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
}
SOCKBUF_UNLOCK(&so->so_rcv);
/*
* Remove out of band data so doesn't get presented to user.
* This can happen independent of advancing the URG pointer,
* but if two URG's are pending at once, some out-of-band
* data may creep in... ick.
*/
if (th->th_urp <= (uint32_t)tlen &&
!(so->so_options & SO_OOBINLINE)) {
/* hdr drop is delayed */
tcp_pulloutofband(so, th, m, drop_hdrlen);
}
} else {
/*
* If no out of band data is expected, pull receive urgent
* pointer along with the receive window.
*/
if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
tp->rcv_up = tp->rcv_nxt;
}
dodata: /* XXX */
tp->rcv_up = tp->rcv_nxt;
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@ -8792,7 +8725,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
static int
bbr_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
uint32_t tiwin, int32_t nxt_pkt)
uint32_t tiwin, int32_t nxt_pkt, uint8_t iptos)
{
int32_t acked;
uint16_t nsegs;
@ -8987,7 +8920,7 @@ bbr_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
static int
bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
{
int32_t todrop;
int32_t ourfinisacked = 0;
@ -9010,6 +8943,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((thflags & TH_ACK) &&
(SEQ_LEQ(th->th_ack, tp->iss) ||
SEQ_GT(th->th_ack, tp->snd_max))) {
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -9196,7 +9130,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
static int
bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
{
int32_t ourfinisacked = 0;
int32_t ret_val;
@ -9207,6 +9141,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((thflags & TH_ACK) &&
(SEQ_LEQ(th->th_ack, tp->snd_una) ||
SEQ_GT(th->th_ack, tp->snd_max))) {
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -9218,6 +9153,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
* data), a valid ACK, a FIN, or a RST.
*/
if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
} else if (thflags & TH_SYN) {
@ -9253,6 +9189,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
* "LAND" DoS attack.
*/
if (SEQ_LT(th->th_seq, tp->irs)) {
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -9405,7 +9342,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
static int
bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
{
struct tcp_bbr *bbr;
int32_t ret_val;
@ -9439,7 +9376,7 @@ bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
__predict_true(th->th_seq == tp->rcv_nxt)) {
if (tlen == 0) {
if (bbr_fastack(m, th, so, tp, to, drop_hdrlen, tlen,
tiwin, nxt_pkt)) {
tiwin, nxt_pkt, iptos)) {
return (0);
}
} else {
@ -9521,7 +9458,8 @@ bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
return (ret_val);
}
if (sbavail(&so->so_snd)) {
if (bbr_progress_timeout_check(bbr)) {
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -9539,7 +9477,7 @@ bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
static int
bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
{
struct tcp_bbr *bbr;
int32_t ret_val;
@ -9616,7 +9554,8 @@ bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
return (ret_val);
}
if (sbavail(&so->so_snd)) {
if (bbr_progress_timeout_check(bbr)) {
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -9632,6 +9571,9 @@ bbr_check_data_after_close(struct mbuf *m, struct tcp_bbr *bbr,
if (bbr->rc_allow_data_af_clo == 0) {
close_now:
tcp_log_end_status(tp, TCP_EI_STATUS_DATA_A_CLOSE);
/* tcp_close will kill the inp pre-log the Reset */
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST);
tp = tcp_close(tp);
KMOD_TCPSTAT_INC(tcps_rcvafterclose);
ctf_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, (*tlen));
@ -9655,7 +9597,7 @@ bbr_check_data_after_close(struct mbuf *m, struct tcp_bbr *bbr,
static int
bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
{
int32_t ourfinisacked = 0;
int32_t ret_val;
@ -9764,7 +9706,8 @@ bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_FIN_WAIT_2);
}
if (sbavail(&so->so_snd)) {
if (bbr_progress_timeout_check(bbr)) {
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -9781,7 +9724,7 @@ bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
static int
bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
{
int32_t ourfinisacked = 0;
int32_t ret_val;
@ -9876,7 +9819,8 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
return (1);
}
if (sbavail(&so->so_snd)) {
if (bbr_progress_timeout_check(bbr)) {
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -9893,7 +9837,7 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
static int
bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
{
int32_t ourfinisacked = 0;
int32_t ret_val;
@ -9988,7 +9932,8 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
return (1);
}
if (sbavail(&so->so_snd)) {
if (bbr_progress_timeout_check(bbr)) {
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -10006,7 +9951,7 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
static int
bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
{
int32_t ourfinisacked = 0;
int32_t ret_val;
@ -10104,7 +10049,8 @@ bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
return (ret_val);
}
if (sbavail(&so->so_snd)) {
if (bbr_progress_timeout_check(bbr)) {
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -11702,6 +11648,8 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
* always. All other times (timers etc) we must have a rack-state
* set (so we assure we have done the checks above for SACK).
*/
if (thflags & TH_FIN)
tcp_log_end_status(tp, TCP_EI_STATUS_CLIENT_FIN);
if (bbr->r_state != tp->t_state)
bbr_set_state(tp, bbr, tiwin);
@ -11740,6 +11688,7 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
(SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
return (1);
}
@ -11765,7 +11714,7 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
}
retval = (*bbr->r_substate) (m, th, so,
tp, &to, drop_hdrlen,
tlen, tiwin, thflags, nxt_pkt);
tlen, tiwin, thflags, nxt_pkt, iptos);
#ifdef BBR_INVARIANTS
if ((retval == 0) &&
(tp->t_inpcb == NULL)) {
@ -11969,14 +11918,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
bbr_do_error_accounting(tp, bbr, rsm, len, error);
return;
}
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
/* Window probe */
KMOD_TCPSTAT_INC(tcps_sndprobe);
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats,
VOI_TCP_RETXPB, len);
#endif
} else if (rsm) {
if (rsm) {
if (rsm->r_flags & BBR_TLP) {
/*
* TLP should not count in retran count, but in its
@ -12241,7 +12183,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
}
/* Mark that we have called bbr_output(). */
if ((bbr->r_timer_override) ||
(tp->t_flags & TF_FORCEDATA) ||
(tp->t_state < TCPS_ESTABLISHED)) {
/* Timeouts or early states are exempt */
if (inp->inp_in_hpts)
@ -12577,47 +12518,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
prefetch_rsm = 1;
}
SOCKBUF_LOCK(sb);
/*
* If in persist timeout with window of 0, send 1 byte. Otherwise,
* if window is small but nonzero and time TF_SENTFIN expired, we
* will send what we can and go to transmit state.
*/
if (tp->t_flags & TF_FORCEDATA) {
if ((sendwin == 0) || (sendwin <= (tp->snd_max - tp->snd_una))) {
/*
* If we still have some data to send, then clear
* the FIN bit. Usually this would happen below
* when it realizes that we aren't sending all the
* data. However, if we have exactly 1 byte of
* unsent data, then it won't clear the FIN bit
* below, and if we are in persist state, we wind up
* sending the packet without recording that we sent
* the FIN bit.
*
* We can't just blindly clear the FIN bit, because
* if we don't have any more data to send then the
* probe will be the FIN itself.
*/
if (sb_offset < sbused(sb))
flags &= ~TH_FIN;
sendwin = 1;
} else {
if ((bbr->rc_in_persist != 0) &&
(tp->snd_wnd >= min((bbr->r_ctl.rc_high_rwnd/2),
bbr_minseg(bbr)))) {
/* Exit persists if there is space */
bbr_exit_persist(tp, bbr, cts, __LINE__);
}
if (rsm == NULL) {
/*
* If we are dropping persist mode then we
* need to correct sb_offset if not a
* retransmit.
*/
sb_offset = tp->snd_max - tp->snd_una;
}
}
}
/*
* If snd_nxt == snd_max and we have transmitted a FIN, the
* sb_offset will be > 0 even if so_snd.sb_cc is 0, resulting in a
@ -12674,7 +12574,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
*/
len = 0;
}
if ((tp->t_flags & TF_FORCEDATA) && (bbr->rc_in_persist)) {
if (bbr->rc_in_persist) {
/*
* We are in persists, figure out if
* a retransmit is available (maybe the previous
@ -12970,9 +12870,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
if ((tp->snd_una == tp->snd_max) && len) { /* Nothing outstanding */
goto send;
}
if (tp->t_flags & TF_FORCEDATA) { /* typ. timeout case */
goto send;
}
if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) {
goto send;
}
@ -13013,7 +12910,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
goto send;
}
/*
* Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW
* Send if we owe the peer an ACK, RST, SYN. ACKNOW
* is also a catch-all for the retransmit timer timeout case.
*/
if (tp->t_flags & TF_ACKNOW) {
@ -13022,9 +12919,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
if (((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) {
goto send;
}
if (SEQ_GT(tp->snd_up, tp->snd_una)) {
goto send;
}
/*
* If our state indicates that FIN should be sent and we have not
* yet done so, then we need to send.
@ -13089,7 +12983,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
}
if (tot_len == 0)
counter_u64_add(bbr_out_size[TCP_MSS_ACCT_JUSTRET], 1);
tp->t_flags &= ~TF_FORCEDATA;
/* Dont update the time if we did not send */
bbr->r_ctl.rc_last_delay_val = 0;
bbr->rc_output_starts_timer = 1;
@ -13586,8 +13479,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
KMOD_TCPSTAT_INC(tcps_sndacks);
else if (flags & (TH_SYN | TH_FIN | TH_RST))
KMOD_TCPSTAT_INC(tcps_sndctrl);
else if (SEQ_GT(tp->snd_up, tp->snd_una))
KMOD_TCPSTAT_INC(tcps_sndurg);
else
KMOD_TCPSTAT_INC(tcps_sndwinup);
@ -13774,17 +13665,11 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
tp->t_flags |= TF_RXWIN0SENT;
} else
tp->t_flags &= ~TF_RXWIN0SENT;
if (SEQ_GT(tp->snd_up, tp->snd_max)) {
th->th_urp = htons((u_short)(tp->snd_up - tp->snd_max));
th->th_flags |= TH_URG;
} else
/*
* If no urgent pointer to send, then we pull the urgent
* pointer to the left edge of the send window so that it
* doesn't drift into the send window on sequence number
* wraparound.
*/
tp->snd_up = tp->snd_una; /* drag it along */
/*
* We don't support urgent data, but drag along
* the pointer in case of a stack switch.
*/
tp->snd_up = tp->snd_una;
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if (to.to_flags & TOF_SIGNATURE) {
@ -14125,8 +14010,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
*/
return (0);
}
if (((tp->t_flags & TF_FORCEDATA) == 0) ||
(bbr->rc_in_persist == 0)) {
if (bbr->rc_in_persist == 0) {
/*
* Advance snd_nxt over sequence space of this segment.
*/
@ -14254,7 +14138,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
tp->t_maxseg = old_maxseg - 40;
bbr_log_msgsize_fail(bbr, tp, len, maxseg, mtu, 0, tso, cts);
}
tp->t_flags &= ~TF_FORCEDATA;
/*
* Nuke all other things that can interfere
* with slot
@ -14284,7 +14167,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
}
/* FALLTHROUGH */
default:
tp->t_flags &= ~TF_FORCEDATA;
slot = (bbr_error_base_paceout + 3) << bbr->oerror_cnt;
bbr->rc_output_starts_timer = 1;
bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0);
@ -14399,7 +14281,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
((flags & TH_RST) == 0) &&
(IN_RECOVERY(tp->t_flags) == 0) &&
(bbr->rc_in_persist == 0) &&
((tp->t_flags & TF_FORCEDATA) == 0) &&
(tot_len < bbr->r_ctl.rc_pace_max_segs)) {
/*
* For non-tso we need to goto again until we have sent out
@ -14416,10 +14297,14 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
}
rsm = NULL;
sack_rxmit = 0;
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK | TF_FORCEDATA);
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
goto again;
}
skip_again:
if ((error == 0) && (flags & TH_FIN))
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_FIN);
if ((error == 0) && (flags & TH_RST))
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST);
if (((flags & (TH_RST | TH_SYN | TH_FIN)) == 0) && tot_len) {
/*
* Calculate/Re-Calculate the hptsi slot in usecs based on
@ -14429,7 +14314,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
if (bbr->rc_no_pacing)
slot = 0;
}
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK | TF_FORCEDATA);
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
enobufs:
if (bbr->rc_use_google == 0)
bbr_check_bbr_for_state(bbr, cts, __LINE__, 0);
@ -15095,6 +14980,13 @@ bbr_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct
return (error);
}
static int
bbr_pru_options(struct tcpcb *tp, int flags)
{
if (flags & PRUS_OOB)
return (EOPNOTSUPP);
return (0);
}
struct tcp_function_block __tcp_bbr = {
.tfb_tcp_block_name = __XSTRING(STACKNAME),
@ -15111,7 +15003,8 @@ struct tcp_function_block __tcp_bbr = {
.tfb_tcp_timer_stop = bbr_timer_stop,
.tfb_tcp_rexmit_tmr = bbr_remxt_tmr,
.tfb_tcp_handoff_ok = bbr_handoff_ok,
.tfb_tcp_mtu_chg = bbr_mtu_chg
.tfb_tcp_mtu_chg = bbr_mtu_chg,
.tfb_pru_options = bbr_pru_options,
};
static const char *bbr_stack_names[] = {

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,5 @@
/*-
* Copyright (c) 2016-9
* Netflix Inc.
* All rights reserved.
* Copyright (c) 2016-2020 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -423,6 +421,7 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int
nxt_pkt = 1;
else
nxt_pkt = 0;
KMOD_TCPSTAT_INC(tcps_rcvtotal);
retval = (*tp->t_fb->tfb_do_segment_nounlock)(m, th, so, tp, drop_hdrlen, tlen,
iptos, nxt_pkt, &tv);
if (retval) {
@ -694,6 +693,7 @@ ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcp
tcp_state_change(tp, TCPS_CLOSED);
/* FALLTHROUGH */
default:
tcp_log_end_status(tp, TCP_EI_STATUS_CLIENT_RST);
tp = tcp_close(tp);
}
dropped = 1;
@ -911,3 +911,24 @@ ctf_decay_count(uint32_t count, uint32_t decay)
decayed_count = count - (uint32_t)perc_count;
return(decayed_count);
}
int32_t
ctf_progress_timeout_check(struct tcpcb *tp, bool log)
{
if (tp->t_maxunacktime && tp->t_acktime && TSTMP_GT(ticks, tp->t_acktime)) {
if ((ticks - tp->t_acktime) >= tp->t_maxunacktime) {
/*
* There is an assumption that the caller
* will drop the connection so we will
* increment the counters here.
*/
if (log)
tcp_log_end_status(tp, TCP_EI_STATUS_PROGRESS);
#ifdef NETFLIX_STATS
KMOD_TCPSTAT_INC(tcps_progdrops);
#endif
return (1);
}
}
return (0);
}

View File

@ -1,7 +1,7 @@
#ifndef __rack_bbr_common_h__
#define __rack_bbr_common_h__
/*-
* Copyright (c) 2017-9 Netflix, Inc.
* Copyright (c) 2016-2020 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -80,7 +80,7 @@
/* Bits per second in bytes per second */
#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
#define TWENTY_THREE_MBPS 2896000
#define TWENTY_THREE_MBPS 2896000 /* 23 megabits in bytes */
#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
@ -138,5 +138,8 @@ ctf_log_sack_filter(struct tcpcb *tp, int num_sack_blks, struct sackblk *sack_bl
uint32_t
ctf_decay_count(uint32_t count, uint32_t decay_percentage);
int32_t
ctf_progress_timeout_check(struct tcpcb *tp, bool log);
#endif
#endif

View File

@ -1,7 +1,5 @@
/*-
* Copyright (c) 2016-9
* Netflix Inc. All rights reserved.
* Author Randall R. Stewart
* Copyright (c) 2016-2020 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -748,7 +746,7 @@ struct tcp_bbr {
/* First cache line 0x00 */
int32_t(*r_substate) (struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, struct tcpopt *,
int32_t, int32_t, uint32_t, int32_t, int32_t); /* Lock(a) */
int32_t, int32_t, uint32_t, int32_t, int32_t, uint8_t); /* Lock(a) */
struct tcpcb *rc_tp; /* The tcpcb Lock(a) */
struct inpcb *rc_inp; /* The inpcb Lock(a) */
struct timeval rc_tv;

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2016-9 Netflix, Inc.
* Copyright (c) 2016-2020 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -37,10 +37,14 @@
#define RACK_HAS_FIN 0x0040/* segment is sent with fin */
#define RACK_TLP 0x0080/* segment sent as tail-loss-probe */
#define RACK_RWND_COLLAPSED 0x0100/* The peer collapsed the rwnd on the segment */
#define RACK_APP_LIMITED 0x0200/* We went app limited after this send */
#define RACK_WAS_ACKED 0x0400/* a RTO undid the ack, but it already had a rtt calc done */
#define RACK_NUM_OF_RETRANS 3
#define RACK_INITIAL_RTO 1000 /* 1 second in milli seconds */
#define RACK_REQ_AVG 4 /* Must be less than 256 */
struct rack_sendmap {
uint32_t r_start; /* Sequence number of the segment */
uint32_t r_end; /* End seq, this is 1 beyond actually */
@ -51,10 +55,16 @@ struct rack_sendmap {
* sent */
uint16_t r_flags; /* Flags as defined above */
uint32_t r_tim_lastsent[RACK_NUM_OF_RETRANS];
uint32_t usec_orig_send; /* time of orginal send in useconds */
uint32_t r_nseq_appl; /* If this one is app limited, this is the nxt seq limited */
uint32_t r_ack_arrival; /* This is the time of ack-arrival (if SACK'd) */
uint8_t r_dupack; /* Dup ack count */
uint8_t r_in_tmap; /* Flag to see if its in the r_tnext array */
uint8_t r_limit_type; /* is this entry counted against a limit? */
uint8_t r_resv[49];
uint8_t r_just_ret : 1, /* After sending, the next pkt was just returned, i.e. limited */
r_one_out_nr : 1, /* Special case 1 outstanding and not in recovery */
r_avail : 6;
uint8_t r_resv[36];
};
RB_HEAD(rack_rb_tree_head, rack_sendmap);
@ -76,7 +86,10 @@ struct rack_rtt_sample {
uint32_t rs_rtt_lowest;
uint32_t rs_rtt_highest;
uint32_t rs_rtt_cnt;
uint32_t rs_us_rtt;
int32_t confidence;
uint64_t rs_rtt_tot;
uint16_t rs_us_rtrcnt;
};
#define RACK_LOG_TYPE_ACK 0x01
@ -135,15 +148,57 @@ struct rack_opts_stats {
uint64_t tcp_rack_idle_reduce_high;
uint64_t rack_no_timer_in_hpts;
uint64_t tcp_rack_min_pace_seg;
uint64_t tcp_rack_min_pace;
uint64_t tcp_rack_cheat;
uint64_t tcp_rack_pace_rate_ca;
uint64_t tcp_rack_rr;
uint64_t tcp_rack_do_detection;
uint64_t tcp_rack_rrr_no_conf_rate;
uint64_t tcp_initial_rate;
uint64_t tcp_initial_win;
uint64_t tcp_hdwr_pacing;
uint64_t tcp_gp_inc_ss;
uint64_t tcp_gp_inc_ca;
uint64_t tcp_gp_inc_rec;
uint64_t tcp_rack_force_max_seg;
uint64_t tcp_rack_pace_rate_ss;
uint64_t tcp_rack_pace_rate_rec;
/* Temp counters for dsack */
uint64_t tcp_sack_path_1;
uint64_t tcp_sack_path_2a;
uint64_t tcp_sack_path_2b;
uint64_t tcp_sack_path_3;
uint64_t tcp_sack_path_4;
/* non temp counters */
uint64_t tcp_rack_scwnd;
uint64_t tcp_rack_noprr;
uint64_t tcp_rack_cfg_rate;
uint64_t tcp_timely_dyn;
uint64_t tcp_rack_mbufq;
uint64_t tcp_fillcw;
uint64_t tcp_npush;
uint64_t tcp_lscwnd;
uint64_t tcp_profile;
};
/* RTT shrink reasons */
#define RACK_RTTS_INIT 0
#define RACK_RTTS_NEWRTT 1
#define RACK_RTTS_EXITPROBE 2
#define RACK_RTTS_ENTERPROBE 3
#define RACK_RTTS_REACHTARGET 4
#define RACK_RTTS_SEEHBP 5
#define RACK_RTTS_NOBACKOFF 6
#define RACK_RTTS_SAFETY 7
#define RACK_USE_BEG 1
#define RACK_USE_END 2
#define RACK_USE_END_OR_THACK 3
#define TLP_USE_ID 1 /* Internet draft behavior */
#define TLP_USE_TWO_ONE 2 /* Use 2.1 behavior */
#define TLP_USE_TWO_TWO 3 /* Use 2.2 behavior */
#define RACK_MIN_BW 8000 /* 64kbps in Bps */
#define MIN_GP_WIN 6 /* We need at least 6 MSS in a GP measurement */
#ifdef _KERNEL
#define RACK_OPTS_SIZE (sizeof(struct rack_opts_stats)/sizeof(uint64_t))
extern counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
@ -200,10 +255,13 @@ struct rack_control {
* tlp_sending Lock(a) */
struct rack_sendmap *rc_resend; /* something we have been asked to
* resend */
struct timeval rc_last_time_decay; /* SAD time decay happened here */
uint32_t input_pkt;
uint32_t saved_input_pkt;
uint32_t rc_hpts_flags;
uint32_t rc_fixed_pacing_rate_ca;
uint32_t rc_fixed_pacing_rate_rec;
uint32_t rc_fixed_pacing_rate_ss;
uint32_t cwnd_to_use; /* The cwnd in use */
uint32_t rc_timer_exp; /* If a timer ticks of expiry */
uint32_t rc_rack_min_rtt; /* lowest RTT seen Lock(a) */
uint32_t rc_rack_largest_cwnd; /* Largest CWND we have seen Lock(a) */
@ -223,15 +281,14 @@ struct rack_control {
uint32_t rc_prr_sndcnt; /* Prr sndcnt Lock(a) */
uint32_t rc_sacked; /* Tot sacked on scoreboard Lock(a) */
uint32_t rc_last_tlp_seq; /* Last tlp sequence Lock(a) */
uint32_t xxx_rc_last_tlp_seq; /* Last tlp sequence Lock(a) */
uint32_t rc_prr_delivered; /* during recovery prr var Lock(a) */
uint16_t rc_tlp_send_cnt; /* Number of TLP sends we have done
* since peer spoke to us Lock(a) */
uint16_t rc_tlp_seg_send_cnt; /* Number of times we have TLP sent
uint16_t rc_tlp_cnt_out; /* count of times we have sent a TLP without new data */
uint16_t xxx_rc_tlp_seg_send_cnt; /* Number of times we have TLP sent
* rc_last_tlp_seq Lock(a) */
uint32_t rc_loss_count; /* During recovery how many segments were lost
uint32_t rc_loss_count; /* How many bytes have been retransmitted
* Lock(a) */
uint32_t rc_reorder_fade; /* Socket option value Lock(a) */
@ -260,39 +317,81 @@ struct rack_control {
struct rack_sendmap *rc_rsm_at_retran; /* Debug variable kept for
* cache line alignment
* Lock(a) */
struct timeval rc_last_ack;
struct rack_sendmap *rc_first_appl; /* Pointer to first app limited */
struct rack_sendmap *rc_end_appl; /* Pointer to last app limited */
/* Cache line split 0x100 */
struct sack_filter rack_sf;
/* Cache line split 0x140 */
/* Flags for various things */
uint32_t last_pacing_time;
uint32_t rc_pace_max_segs;
uint32_t rc_pace_min_segs;
uint32_t rc_app_limited_cnt;
uint16_t rack_per_of_gp_ss; /* 100 = 100%, so from 65536 = 655 x bw */
uint16_t rack_per_of_gp_ca; /* 100 = 100%, so from 65536 = 655 x bw */
uint16_t rack_per_of_gp_rec; /* 100 = 100%, so from 65536 = 655 x bw, 0=off */
uint16_t rack_per_of_gp_probertt; /* 100 = 100%, so from 65536 = 655 x bw, 0=off */
uint32_t rc_high_rwnd;
uint32_t ack_count;
uint32_t sack_count;
uint32_t sack_noextra_move;
uint32_t sack_moved_extra;
struct rack_rtt_sample rack_rs;
const struct tcp_hwrate_limit_table *crte;
uint32_t rc_agg_early;
uint32_t rc_agg_delayed;
uint32_t rc_tlp_rxt_last_time;
uint32_t rc_saved_cwnd;
uint32_t rc_gp_history[RACK_GP_HIST];
uint32_t rc_gp_output_ts;
uint32_t rc_gp_cumack_ts;
struct timeval act_rcv_time;
struct timeval rc_last_time_decay; /* SAD time decay happened here */
uint64_t gp_bw;
uint64_t init_rate;
#ifdef NETFLIX_SHARED_CWND
struct shared_cwnd *rc_scw;
#endif
uint64_t last_gp_comp_bw;
uint64_t last_max_bw; /* Our calculated max b/w last */
struct time_filter_small rc_gp_min_rtt;
int32_t rc_rtt_diff; /* Timely style rtt diff of our gp_srtt */
uint32_t rc_gp_srtt; /* Current GP srtt */
uint32_t rc_prev_gp_srtt; /* Previous RTT */
uint32_t rc_entry_gp_rtt; /* Entry to PRTT gp-rtt */
uint32_t rc_loss_at_start; /* At measurement window where was our lost value */
uint32_t forced_ack_ts;
uint32_t rc_lower_rtt_us_cts; /* Time our GP rtt was last lowered */
uint32_t rc_time_probertt_entered;
uint32_t rc_time_probertt_starts;
uint32_t rc_lowest_us_rtt;
uint32_t rc_highest_us_rtt;
uint32_t rc_last_us_rtt;
uint32_t rc_time_of_last_probertt;
uint32_t rc_target_probertt_flight;
uint32_t rc_probertt_sndmax_atexit; /* Highest sent to in probe-rtt */
uint32_t rc_gp_lowrtt; /* Lowest rtt seen during GPUT measurement */
uint32_t rc_gp_high_rwnd; /* Highest rwnd seen during GPUT measurement */
int32_t rc_scw_index;
uint32_t rc_tlp_threshold; /* Socket option value Lock(a) */
uint16_t rc_early_recovery_segs; /* Socket option value Lock(a) */
uint16_t rc_reorder_shift; /* Socket option value Lock(a) */
uint16_t rc_pkt_delay; /* Socket option value Lock(a) */
uint8_t rc_no_push_at_mrtt; /* No push when we exceed max rtt */
uint8_t num_avg; /* average count before we go to normal decay */
uint8_t rc_prop_rate; /* Socket option value Lock(a) */
uint8_t rc_prop_reduce; /* Socket option value Lock(a) */
uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */
uint8_t rc_early_recovery; /* Socket option value Lock(a) */
uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */
uint8_t rc_min_to; /* Socket option value Lock(a) */
uint8_t rc_tlp_rtx_out; /* This is TLPRtxOut in the draft */
uint8_t rc_rate_sample_method;
uint8_t rc_gp_hist_idx: 7,
rc_gp_hist_filled: 1;
uint8_t rc_gp_hist_idx;
};
#define RACK_TIMELY_CNT_BOOST 5 /* At 5th increase boost */
#define RACK_MINRTT_FILTER_TIM 10 /* Seconds */
#ifdef _KERNEL
struct tcp_rack {
@ -306,39 +405,75 @@ struct tcp_rack {
uint32_t rc_free_cnt; /* Number of free entries on the rc_free list
* Lock(a) */
uint32_t rc_rack_rtt; /* RACK-RTT Lock(a) */
uint16_t r_wanted_output; /* Output routine wanted to be called */
uint16_t r_cpu; /* CPU that the INP is running on Lock(a) */
uint16_t rc_pace_max_segs; /* Socket option value Lock(a) */
uint16_t rc_pace_reduce;/* Socket option value Lock(a) */
uint16_t r_mbuf_queue : 1, /* Do we do mbuf queue for non-paced */
rtt_limit_mul : 4, /* muliply this by low rtt */
r_limit_scw : 1,
r_avail_bits : 10; /* Available */
uint16_t rc_user_set_max_segs; /* Socket option value Lock(a) */
uint16_t forced_ack : 1,
rc_gp_incr : 1,
rc_gp_bwred : 1,
rc_gp_timely_inc_cnt : 3,
rc_gp_timely_dec_cnt : 3,
rc_not_backing_off: 1,
rc_highly_buffered: 1, /* The path is highly buffered */
rc_dragged_bottom: 1,
rc_dack_mode : 1, /* Mac O/S emulation of d-ack */
rc_dack_toggle : 1, /* For Mac O/S emulation of d-ack */
pacing_longer_than_rtt : 1,
rc_gp_filled : 1;
uint8_t r_state; /* Current rack state Lock(a) */
uint8_t rc_tmr_stopped : 7,
t_timers_stopped : 1;
uint8_t rc_enobuf; /* count of enobufs on connection provides
* backoff Lock(a) */
uint8_t rc_enobuf : 7, /* count of enobufs on connection provides */
rc_on_min_to : 1;
uint8_t r_timer_override : 1, /* hpts override Lock(a) */
r_tlp_running : 1, /* Running from a TLP timeout Lock(a) */
r_is_v6 : 1, /* V6 pcb Lock(a) */
rc_in_persist : 1,
rc_last_pto_set : 1, /* XXX not used */
rc_tlp_in_progress : 1,
rc_always_pace : 1, /* Socket option value Lock(a) */
tlp_timer_up : 1; /* The tlp timer is up flag Lock(a) */
uint8_t r_enforce_min_pace : 2,
rc_pace_to_cwnd : 1,
rc_pace_fill_if_rttin_range : 1,
xxx_avail_bits : 1;
uint8_t app_limited_needs_set : 1,
use_fixed_rate : 1,
rc_has_collapsed : 1,
r_rep_attack : 1,
r_rep_reverse : 1,
r_xxx_min_pace_seg_thresh : 3;
uint8_t rack_tlp_threshold_use;
rack_hdrw_pacing : 1, /* We are doing Hardware pacing */
rack_hdw_pace_ena : 1, /* Is hardware pacing enabled? */
rack_attempt_hdwr_pace : 1; /* Did we attempt hdwr pacing (if allowed) */
uint8_t rack_tlp_threshold_use : 3, /* only 1, 2 and 3 used so far */
rack_rec_nonrxt_use_cr : 1,
rack_enable_scwnd : 1,
rack_attempted_scwnd : 1,
rack_no_prr : 1,
rack_scwnd_is_idle : 1;
uint8_t rc_allow_data_af_clo: 1,
delayed_ack : 1,
set_pacing_done_a_iw : 1,
use_rack_cheat : 1,
use_rack_rr : 1,
alloc_limit_reported : 1,
sack_attack_disable : 1,
do_detection : 1,
rc_avail : 1;
uint16_t rack_per_of_gp;
rc_force_max_seg : 1;
uint8_t rack_cwnd_limited : 1,
r_early : 1,
r_late : 1,
r_running_early : 1,
r_running_late : 1,
r_wanted_output: 1,
r_rr_config : 2;
uint16_t rc_init_win : 8,
rc_gp_rtt_set : 1,
rc_gp_dyn_mul : 1,
rc_gp_saw_rec : 1,
rc_gp_saw_ca : 1,
rc_gp_saw_ss : 1,
rc_gp_no_rec_chg : 1,
in_probe_rtt : 1,
measure_saw_probe_rtt : 1;
/* Cache line 2 0x40 */
struct rack_control r_ctl;
} __aligned(CACHE_LINE_SIZE);