tcp: Fix bugs related to the PUSH bit and rack and an ack war
Michaels testing with UDP tunneling found an issue with the push bit, which was only partly fixed in the last commit. The problem is the left edge gets transmitted before the adjustments are done to the send_map, this means that right edge bits must be considered to be added only if the entire RSM is being retransmitted. Now syzkaller also continued to find a crash, which Michael sent me the reproducer for. Turns out that the reproducer on default (freebsd) stack made the stack get into an ack-war with itself. After fixing the reference issues in rack the same ack-war was found in rack (and bbr). Basically what happens is we go into the reassembly code and lose the FIN bit. The trick here is we should not be going into the reassembly code if tlen == 0 i.e. the peer never sent you anything. That then gets the proper action on the FIN bit but then you end up in LAST_ACK with no timers running. This is because the usrclosed function gets called and the FIN's and such have already been exchanged. So when we should be entering FIN_WAIT2 (or even FIN_WAIT1) we get stuck in LAST_ACK. Fixing this means tweaking the usrclosed function so that we properly recognize the condition and drop into FIN_WAIT2 where a timer will allow at least TP_MAXIDLE before closing (to allow time for the peer to retransmit its FIN if the ack is lost). Setting the fast_finwait2 timer can speed this up in testing. Reviewed by: mtuexen,rscheff Sponsored by: Netflix Inc Differential Revision: https://reviews.freebsd.org/D30451
This commit is contained in:
parent
84768d1149
commit
13c0e198ca
@ -3191,8 +3191,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
|||||||
* when trimming from the head.
|
* when trimming from the head.
|
||||||
*/
|
*/
|
||||||
tcp_seq temp = save_start;
|
tcp_seq temp = save_start;
|
||||||
thflags = tcp_reass(tp, th, &temp, &tlen, m);
|
if (tlen) {
|
||||||
tp->t_flags |= TF_ACKNOW;
|
thflags = tcp_reass(tp, th, &temp, &tlen, m);
|
||||||
|
tp->t_flags |= TF_ACKNOW;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
||||||
(save_tlen > 0) &&
|
(save_tlen > 0) &&
|
||||||
|
@ -8320,8 +8320,10 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
|||||||
* trimming from the head.
|
* trimming from the head.
|
||||||
*/
|
*/
|
||||||
tcp_seq temp = save_start;
|
tcp_seq temp = save_start;
|
||||||
thflags = tcp_reass(tp, th, &temp, &tlen, m);
|
if (tlen) {
|
||||||
tp->t_flags |= TF_ACKNOW;
|
thflags = tcp_reass(tp, th, &temp, &tlen, m);
|
||||||
|
tp->t_flags |= TF_ACKNOW;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
||||||
(save_tlen > 0) &&
|
(save_tlen > 0) &&
|
||||||
|
@ -6017,7 +6017,7 @@ rack_setup_offset_for_rsm(struct rack_sendmap *src_rsm, struct rack_sendmap *rsm
|
|||||||
struct mbuf *m;
|
struct mbuf *m;
|
||||||
uint32_t soff;
|
uint32_t soff;
|
||||||
|
|
||||||
if (src_rsm->orig_m_len != src_rsm->m->m_len) {
|
if (src_rsm->m && (src_rsm->orig_m_len != src_rsm->m->m_len)) {
|
||||||
/* Fix up the orig_m_len and possibly the mbuf offset */
|
/* Fix up the orig_m_len and possibly the mbuf offset */
|
||||||
rack_adjust_orig_mlen(src_rsm);
|
rack_adjust_orig_mlen(src_rsm);
|
||||||
}
|
}
|
||||||
@ -8818,21 +8818,23 @@ rack_process_to_cumack(struct tcpcb *tp, struct tcp_rack *rack, register uint32_
|
|||||||
rack->r_ctl.rc_gp_cumack_ts = rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
|
rack->r_ctl.rc_gp_cumack_ts = rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
|
||||||
rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_TRIM_HEAD, th_ack, __LINE__);
|
rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_TRIM_HEAD, th_ack, __LINE__);
|
||||||
/* Now we need to move our offset forward too */
|
/* Now we need to move our offset forward too */
|
||||||
if (rsm->orig_m_len != rsm->m->m_len) {
|
if (rsm->m && (rsm->orig_m_len != rsm->m->m_len)) {
|
||||||
/* Fix up the orig_m_len and possibly the mbuf offset */
|
/* Fix up the orig_m_len and possibly the mbuf offset */
|
||||||
rack_adjust_orig_mlen(rsm);
|
rack_adjust_orig_mlen(rsm);
|
||||||
}
|
}
|
||||||
rsm->soff += (th_ack - rsm->r_start);
|
rsm->soff += (th_ack - rsm->r_start);
|
||||||
rsm->r_start = th_ack;
|
rsm->r_start = th_ack;
|
||||||
/* Now do we need to move the mbuf fwd too? */
|
/* Now do we need to move the mbuf fwd too? */
|
||||||
while (rsm->soff >= rsm->m->m_len) {
|
if (rsm->m) {
|
||||||
rsm->soff -= rsm->m->m_len;
|
while (rsm->soff >= rsm->m->m_len) {
|
||||||
rsm->m = rsm->m->m_next;
|
rsm->soff -= rsm->m->m_len;
|
||||||
KASSERT((rsm->m != NULL),
|
rsm->m = rsm->m->m_next;
|
||||||
(" nrsm:%p hit at soff:%u null m",
|
KASSERT((rsm->m != NULL),
|
||||||
rsm, rsm->soff));
|
(" nrsm:%p hit at soff:%u null m",
|
||||||
|
rsm, rsm->soff));
|
||||||
|
}
|
||||||
|
rsm->orig_m_len = rsm->m->m_len;
|
||||||
}
|
}
|
||||||
rsm->orig_m_len = rsm->m->m_len;
|
|
||||||
if (rack->app_limited_needs_set)
|
if (rack->app_limited_needs_set)
|
||||||
rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_BEG);
|
rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_BEG);
|
||||||
}
|
}
|
||||||
@ -9655,7 +9657,7 @@ rack_adjust_sendmap(struct tcp_rack *rack, struct sockbuf *sb, tcp_seq snd_una)
|
|||||||
/* Nothing outstanding */
|
/* Nothing outstanding */
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while (rsm->m == m) {
|
while (rsm->m && (rsm->m == m)) {
|
||||||
/* one to adjust */
|
/* one to adjust */
|
||||||
#ifdef INVARIANTS
|
#ifdef INVARIANTS
|
||||||
struct mbuf *tm;
|
struct mbuf *tm;
|
||||||
@ -9676,10 +9678,16 @@ rack_adjust_sendmap(struct tcp_rack *rack, struct sockbuf *sb, tcp_seq snd_una)
|
|||||||
}
|
}
|
||||||
rsm->m = tm;
|
rsm->m = tm;
|
||||||
rsm->soff = soff;
|
rsm->soff = soff;
|
||||||
rsm->orig_m_len = rsm->m->m_len;
|
if (tm)
|
||||||
|
rsm->orig_m_len = rsm->m->m_len;
|
||||||
|
else
|
||||||
|
rsm->orig_m_len = 0;
|
||||||
#else
|
#else
|
||||||
rsm->m = sbsndmbuf(sb, (rsm->r_start - snd_una), &rsm->soff);
|
rsm->m = sbsndmbuf(sb, (rsm->r_start - snd_una), &rsm->soff);
|
||||||
rsm->orig_m_len = rsm->m->m_len;
|
if (rsm->m)
|
||||||
|
rsm->orig_m_len = rsm->m->m_len;
|
||||||
|
else
|
||||||
|
rsm->orig_m_len = 0;
|
||||||
#endif
|
#endif
|
||||||
rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree,
|
rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree,
|
||||||
rsm);
|
rsm);
|
||||||
@ -10058,6 +10066,7 @@ rack_validate_fo_sendwin_up(struct tcpcb *tp, struct tcp_rack *rack)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return value of 1, the TCB is unlocked and most
|
* Return value of 1, the TCB is unlocked and most
|
||||||
* likely gone, return value of 0, the TCP is still
|
* likely gone, return value of 0, the TCP is still
|
||||||
@ -10226,9 +10235,10 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
|||||||
* trimming from the head.
|
* trimming from the head.
|
||||||
*/
|
*/
|
||||||
tcp_seq temp = save_start;
|
tcp_seq temp = save_start;
|
||||||
|
if (tlen) {
|
||||||
thflags = tcp_reass(tp, th, &temp, &tlen, m);
|
thflags = tcp_reass(tp, th, &temp, &tlen, m);
|
||||||
tp->t_flags |= TF_ACKNOW;
|
tp->t_flags |= TF_ACKNOW;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
||||||
(save_tlen > 0) &&
|
(save_tlen > 0) &&
|
||||||
@ -12190,7 +12200,10 @@ rack_init(struct tcpcb *tp)
|
|||||||
rsm->r_dupack = 0;
|
rsm->r_dupack = 0;
|
||||||
if (rack->rc_inp->inp_socket->so_snd.sb_mb != NULL) {
|
if (rack->rc_inp->inp_socket->so_snd.sb_mb != NULL) {
|
||||||
rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd, 0, &rsm->soff);
|
rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd, 0, &rsm->soff);
|
||||||
rsm->orig_m_len = rsm->m->m_len;
|
if (rsm->m)
|
||||||
|
rsm->orig_m_len = rsm->m->m_len;
|
||||||
|
else
|
||||||
|
rsm->orig_m_len = 0;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* This can happen if we have a stand-alone FIN or
|
* This can happen if we have a stand-alone FIN or
|
||||||
@ -15074,6 +15087,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
|
|||||||
uint32_t us_cts;
|
uint32_t us_cts;
|
||||||
uint32_t if_hw_tsomaxsegcount = 0, startseq;
|
uint32_t if_hw_tsomaxsegcount = 0, startseq;
|
||||||
uint32_t if_hw_tsomaxsegsize;
|
uint32_t if_hw_tsomaxsegsize;
|
||||||
|
|
||||||
#ifdef INET6
|
#ifdef INET6
|
||||||
struct ip6_hdr *ip6 = NULL;
|
struct ip6_hdr *ip6 = NULL;
|
||||||
|
|
||||||
@ -15183,7 +15197,15 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
|
|||||||
}
|
}
|
||||||
th->th_seq = htonl(rsm->r_start);
|
th->th_seq = htonl(rsm->r_start);
|
||||||
th->th_ack = htonl(tp->rcv_nxt);
|
th->th_ack = htonl(tp->rcv_nxt);
|
||||||
if(rsm->r_flags & RACK_HAD_PUSH)
|
/*
|
||||||
|
* The PUSH bit should only be applied
|
||||||
|
* if the full retransmission is made. If
|
||||||
|
* we are sending less than this is the
|
||||||
|
* left hand edge and should not have
|
||||||
|
* the PUSH bit.
|
||||||
|
*/
|
||||||
|
if ((rsm->r_flags & RACK_HAD_PUSH) &&
|
||||||
|
(len == (rsm->r_end - rsm->r_start)))
|
||||||
flags |= TH_PUSH;
|
flags |= TH_PUSH;
|
||||||
th->th_flags = flags;
|
th->th_flags = flags;
|
||||||
th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale));
|
th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale));
|
||||||
|
@ -2637,6 +2637,22 @@ tcp_usrclosed(struct tcpcb *tp)
|
|||||||
tcp_state_change(tp, TCPS_LAST_ACK);
|
tcp_state_change(tp, TCPS_LAST_ACK);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if ((tp->t_state == TCPS_LAST_ACK) &&
|
||||||
|
(tp->t_flags & TF_SENTFIN)) {
|
||||||
|
/*
|
||||||
|
* If we have reached LAST_ACK, and
|
||||||
|
* we sent a FIN (e.g. via MSG_EOR), then
|
||||||
|
* we really should move to either FIN_WAIT_1
|
||||||
|
* or FIN_WAIT_2 depending on snd_max/snd_una.
|
||||||
|
*/
|
||||||
|
if (tp->snd_una == tp->snd_max) {
|
||||||
|
/* The FIN is acked */
|
||||||
|
tcp_state_change(tp, TCPS_FIN_WAIT_2);
|
||||||
|
} else {
|
||||||
|
/* The FIN is still outstanding */
|
||||||
|
tcp_state_change(tp, TCPS_FIN_WAIT_1);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (tp->t_state >= TCPS_FIN_WAIT_2) {
|
if (tp->t_state >= TCPS_FIN_WAIT_2) {
|
||||||
soisdisconnected(tp->t_inpcb->inp_socket);
|
soisdisconnected(tp->t_inpcb->inp_socket);
|
||||||
/* Prevent the connection hanging in FIN_WAIT_2 forever. */
|
/* Prevent the connection hanging in FIN_WAIT_2 forever. */
|
||||||
|
Loading…
Reference in New Issue
Block a user