tcp: Fix bugs related to the PUSH bit and rack and an ack war

Michaels testing with UDP tunneling found an issue with the push bit, which was only partly fixed
in the last commit. The problem is the left edge gets transmitted before the adjustments are done
to the send_map, this means that right edge bits must be considered to be added only if
the entire RSM is being retransmitted.

Now syzkaller also continued to find a crash, which Michael sent me the reproducer for. Turns
out that the reproducer on default (freebsd) stack made the stack get into an ack-war with itself.
After fixing the reference issues in rack the same ack-war was found in rack (and bbr). Basically
what happens is we go into the reassembly code and lose the FIN bit. The trick here is we
should not be going into the reassembly code if tlen == 0 i.e. the peer never sent you anything.
That then gets the proper action on the FIN bit but then you end up in LAST_ACK with no
timers running. This is because the usrclosed function gets called and the FIN's and such have
already been exchanged. So when we should be entering FIN_WAIT2 (or even FIN_WAIT1) we get
stuck in LAST_ACK. Fixing this means tweaking the usrclosed function so that we properly
recognize the condition and drop into FIN_WAIT2 where a timer will allow at least TP_MAXIDLE
before closing (to allow time for the peer to retransmit its FIN if the ack is lost). Setting the fast_finwait2
timer can speed this up in testing.

Reviewed by: mtuexen,rscheff
Sponsored by: Netflix Inc
Differential Revision:	https://reviews.freebsd.org/D30451
This commit is contained in:
Randall Stewart 2021-05-25 13:23:31 -04:00
parent 84768d1149
commit 13c0e198ca
4 changed files with 63 additions and 21 deletions

View File

@ -3191,8 +3191,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* when trimming from the head. * when trimming from the head.
*/ */
tcp_seq temp = save_start; tcp_seq temp = save_start;
thflags = tcp_reass(tp, th, &temp, &tlen, m); if (tlen) {
tp->t_flags |= TF_ACKNOW; thflags = tcp_reass(tp, th, &temp, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
} }
if ((tp->t_flags & TF_SACK_PERMIT) && if ((tp->t_flags & TF_SACK_PERMIT) &&
(save_tlen > 0) && (save_tlen > 0) &&

View File

@ -8320,8 +8320,10 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
* trimming from the head. * trimming from the head.
*/ */
tcp_seq temp = save_start; tcp_seq temp = save_start;
thflags = tcp_reass(tp, th, &temp, &tlen, m); if (tlen) {
tp->t_flags |= TF_ACKNOW; thflags = tcp_reass(tp, th, &temp, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
} }
if ((tp->t_flags & TF_SACK_PERMIT) && if ((tp->t_flags & TF_SACK_PERMIT) &&
(save_tlen > 0) && (save_tlen > 0) &&

View File

@ -6017,7 +6017,7 @@ rack_setup_offset_for_rsm(struct rack_sendmap *src_rsm, struct rack_sendmap *rsm
struct mbuf *m; struct mbuf *m;
uint32_t soff; uint32_t soff;
if (src_rsm->orig_m_len != src_rsm->m->m_len) { if (src_rsm->m && (src_rsm->orig_m_len != src_rsm->m->m_len)) {
/* Fix up the orig_m_len and possibly the mbuf offset */ /* Fix up the orig_m_len and possibly the mbuf offset */
rack_adjust_orig_mlen(src_rsm); rack_adjust_orig_mlen(src_rsm);
} }
@ -8818,21 +8818,23 @@ rack_process_to_cumack(struct tcpcb *tp, struct tcp_rack *rack, register uint32_
rack->r_ctl.rc_gp_cumack_ts = rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)]; rack->r_ctl.rc_gp_cumack_ts = rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_TRIM_HEAD, th_ack, __LINE__); rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_TRIM_HEAD, th_ack, __LINE__);
/* Now we need to move our offset forward too */ /* Now we need to move our offset forward too */
if (rsm->orig_m_len != rsm->m->m_len) { if (rsm->m && (rsm->orig_m_len != rsm->m->m_len)) {
/* Fix up the orig_m_len and possibly the mbuf offset */ /* Fix up the orig_m_len and possibly the mbuf offset */
rack_adjust_orig_mlen(rsm); rack_adjust_orig_mlen(rsm);
} }
rsm->soff += (th_ack - rsm->r_start); rsm->soff += (th_ack - rsm->r_start);
rsm->r_start = th_ack; rsm->r_start = th_ack;
/* Now do we need to move the mbuf fwd too? */ /* Now do we need to move the mbuf fwd too? */
while (rsm->soff >= rsm->m->m_len) { if (rsm->m) {
rsm->soff -= rsm->m->m_len; while (rsm->soff >= rsm->m->m_len) {
rsm->m = rsm->m->m_next; rsm->soff -= rsm->m->m_len;
KASSERT((rsm->m != NULL), rsm->m = rsm->m->m_next;
(" nrsm:%p hit at soff:%u null m", KASSERT((rsm->m != NULL),
rsm, rsm->soff)); (" nrsm:%p hit at soff:%u null m",
rsm, rsm->soff));
}
rsm->orig_m_len = rsm->m->m_len;
} }
rsm->orig_m_len = rsm->m->m_len;
if (rack->app_limited_needs_set) if (rack->app_limited_needs_set)
rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_BEG); rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_BEG);
} }
@ -9655,7 +9657,7 @@ rack_adjust_sendmap(struct tcp_rack *rack, struct sockbuf *sb, tcp_seq snd_una)
/* Nothing outstanding */ /* Nothing outstanding */
return; return;
} }
while (rsm->m == m) { while (rsm->m && (rsm->m == m)) {
/* one to adjust */ /* one to adjust */
#ifdef INVARIANTS #ifdef INVARIANTS
struct mbuf *tm; struct mbuf *tm;
@ -9676,10 +9678,16 @@ rack_adjust_sendmap(struct tcp_rack *rack, struct sockbuf *sb, tcp_seq snd_una)
} }
rsm->m = tm; rsm->m = tm;
rsm->soff = soff; rsm->soff = soff;
rsm->orig_m_len = rsm->m->m_len; if (tm)
rsm->orig_m_len = rsm->m->m_len;
else
rsm->orig_m_len = 0;
#else #else
rsm->m = sbsndmbuf(sb, (rsm->r_start - snd_una), &rsm->soff); rsm->m = sbsndmbuf(sb, (rsm->r_start - snd_una), &rsm->soff);
rsm->orig_m_len = rsm->m->m_len; if (rsm->m)
rsm->orig_m_len = rsm->m->m_len;
else
rsm->orig_m_len = 0;
#endif #endif
rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree,
rsm); rsm);
@ -10058,6 +10066,7 @@ rack_validate_fo_sendwin_up(struct tcpcb *tp, struct tcp_rack *rack)
} }
} }
/* /*
* Return value of 1, the TCB is unlocked and most * Return value of 1, the TCB is unlocked and most
* likely gone, return value of 0, the TCP is still * likely gone, return value of 0, the TCP is still
@ -10226,9 +10235,10 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
* trimming from the head. * trimming from the head.
*/ */
tcp_seq temp = save_start; tcp_seq temp = save_start;
if (tlen) {
thflags = tcp_reass(tp, th, &temp, &tlen, m); thflags = tcp_reass(tp, th, &temp, &tlen, m);
tp->t_flags |= TF_ACKNOW; tp->t_flags |= TF_ACKNOW;
}
} }
if ((tp->t_flags & TF_SACK_PERMIT) && if ((tp->t_flags & TF_SACK_PERMIT) &&
(save_tlen > 0) && (save_tlen > 0) &&
@ -12190,7 +12200,10 @@ rack_init(struct tcpcb *tp)
rsm->r_dupack = 0; rsm->r_dupack = 0;
if (rack->rc_inp->inp_socket->so_snd.sb_mb != NULL) { if (rack->rc_inp->inp_socket->so_snd.sb_mb != NULL) {
rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd, 0, &rsm->soff); rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd, 0, &rsm->soff);
rsm->orig_m_len = rsm->m->m_len; if (rsm->m)
rsm->orig_m_len = rsm->m->m_len;
else
rsm->orig_m_len = 0;
} else { } else {
/* /*
* This can happen if we have a stand-alone FIN or * This can happen if we have a stand-alone FIN or
@ -15074,6 +15087,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
uint32_t us_cts; uint32_t us_cts;
uint32_t if_hw_tsomaxsegcount = 0, startseq; uint32_t if_hw_tsomaxsegcount = 0, startseq;
uint32_t if_hw_tsomaxsegsize; uint32_t if_hw_tsomaxsegsize;
#ifdef INET6 #ifdef INET6
struct ip6_hdr *ip6 = NULL; struct ip6_hdr *ip6 = NULL;
@ -15183,7 +15197,15 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
} }
th->th_seq = htonl(rsm->r_start); th->th_seq = htonl(rsm->r_start);
th->th_ack = htonl(tp->rcv_nxt); th->th_ack = htonl(tp->rcv_nxt);
if(rsm->r_flags & RACK_HAD_PUSH) /*
* The PUSH bit should only be applied
* if the full retransmission is made. If
* we are sending less than this is the
* left hand edge and should not have
* the PUSH bit.
*/
if ((rsm->r_flags & RACK_HAD_PUSH) &&
(len == (rsm->r_end - rsm->r_start)))
flags |= TH_PUSH; flags |= TH_PUSH;
th->th_flags = flags; th->th_flags = flags;
th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale)); th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale));

View File

@ -2637,6 +2637,22 @@ tcp_usrclosed(struct tcpcb *tp)
tcp_state_change(tp, TCPS_LAST_ACK); tcp_state_change(tp, TCPS_LAST_ACK);
break; break;
} }
if ((tp->t_state == TCPS_LAST_ACK) &&
(tp->t_flags & TF_SENTFIN)) {
/*
* If we have reached LAST_ACK, and
* we sent a FIN (e.g. via MSG_EOR), then
* we really should move to either FIN_WAIT_1
* or FIN_WAIT_2 depending on snd_max/snd_una.
*/
if (tp->snd_una == tp->snd_max) {
/* The FIN is acked */
tcp_state_change(tp, TCPS_FIN_WAIT_2);
} else {
/* The FIN is still outstanding */
tcp_state_change(tp, TCPS_FIN_WAIT_1);
}
}
if (tp->t_state >= TCPS_FIN_WAIT_2) { if (tp->t_state >= TCPS_FIN_WAIT_2) {
soisdisconnected(tp->t_inpcb->inp_socket); soisdisconnected(tp->t_inpcb->inp_socket);
/* Prevent the connection hanging in FIN_WAIT_2 forever. */ /* Prevent the connection hanging in FIN_WAIT_2 forever. */