diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index eda9eb46e1eb..67622321a1c9 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -164,8 +164,7 @@ struct inpcbhead tcb; struct inpcbinfo tcbinfo; struct mtx *tcbinfo_mtx; -static void tcp_dooptions(struct tcpcb *, struct tcpopt *, u_char *, - int, int, struct tcphdr *); +static void tcp_dooptions(struct tcpopt *, u_char *, int, int); static void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); @@ -747,7 +746,7 @@ findpcb: * present in a SYN segment. See tcp_timewait(). */ if (thflags & TH_SYN) - tcp_dooptions((struct tcpcb *)NULL, &to, optp, optlen, 1, th); + tcp_dooptions(&to, optp, optlen, 1); if (tcp_timewait((struct tcptw *)inp->inp_ppcb, &to, th, m, tlen)) goto findpcb; @@ -961,7 +960,7 @@ findpcb: tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif - tcp_dooptions(tp, &to, optp, optlen, 1, th); + tcp_dooptions(&to, optp, optlen, 1); if (!syncache_add(&inc, &to, th, &so, m)) goto drop; if (so == NULL) { @@ -1082,7 +1081,7 @@ after_listen: * for incoming connections is handled in tcp_syncache. * XXX this is traditional behavior, may need to be cleaned up. */ - tcp_dooptions(tp, &to, optp, optlen, thflags & TH_SYN, th); + tcp_dooptions(&to, optp, optlen, thflags & TH_SYN); if (thflags & TH_SYN) { if (to.to_flags & TOF_SCALE) { tp->t_flags |= TF_RCVD_SCALE; @@ -1104,11 +1103,6 @@ after_listen: } - if (tp->sack_enable) { - /* Delete stale (cumulatively acked) SACK holes */ - tcp_del_sackholes(tp, th); - } - /* * Header prediction: check for the two common cases * of a uni-directional data xfer. If the packet has @@ -1153,7 +1147,7 @@ after_listen: ((!tcp_do_newreno && !tp->sack_enable && tp->t_dupacks < tcprexmtthresh) || ((tcp_do_newreno || tp->sack_enable) && - !IN_FASTRECOVERY(tp)))) { + !IN_FASTRECOVERY(tp) && to.to_nsacks == 0))) { KASSERT(headlocked, ("headlocked")); INP_INFO_WUNLOCK(&tcbinfo); headlocked = 0; @@ -1824,6 +1818,12 @@ trimthenstep6: case TCPS_LAST_ACK: case TCPS_TIME_WAIT: KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); + if (SEQ_GT(th->th_ack, tp->snd_max)) { + tcpstat.tcps_rcvacktoomuch++; + goto dropafterack; + } + if (tp->sack_enable) + tcp_sack_doack(tp, &to, th->th_ack); if (SEQ_LEQ(th->th_ack, tp->snd_una)) { if (tlen == 0 && tiwin == tp->snd_wnd) { tcpstat.tcps_rcvdupack++; @@ -2002,10 +2002,6 @@ trimthenstep6: tp->snd_cwnd = tp->snd_ssthresh; } tp->t_dupacks = 0; - if (SEQ_GT(th->th_ack, tp->snd_max)) { - tcpstat.tcps_rcvacktoomuch++; - goto dropafterack; - } /* * If we reach this point, ACK is not a duplicate, * i.e., it ACKs something we sent. @@ -2560,13 +2556,11 @@ drop: * Parse TCP options and place in tcpopt. */ static void -tcp_dooptions(tp, to, cp, cnt, is_syn, th) - struct tcpcb *tp; +tcp_dooptions(to, cp, cnt, is_syn) struct tcpopt *to; u_char *cp; int cnt; int is_syn; - struct tcphdr *th; { int opt, optlen; @@ -2642,10 +2636,12 @@ tcp_dooptions(tp, to, cp, cnt, is_syn, th) to->to_flags |= TOF_SACK; } break; - case TCPOPT_SACK: - if (!tp || tcp_sack_option(tp, th, cp, optlen)) + if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) continue; + to->to_nsacks = (optlen - 2) / TCPOLEN_SACK; + to->to_sacks = cp + 2; + tcpstat.tcps_sack_rcv_blocks++; break; default: continue; diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index eda9eb46e1eb..67622321a1c9 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -164,8 +164,7 @@ struct inpcbhead tcb; struct inpcbinfo tcbinfo; struct mtx *tcbinfo_mtx; -static void tcp_dooptions(struct tcpcb *, struct tcpopt *, u_char *, - int, int, struct tcphdr *); +static void tcp_dooptions(struct tcpopt *, u_char *, int, int); static void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); @@ -747,7 +746,7 @@ findpcb: * present in a SYN segment. See tcp_timewait(). */ if (thflags & TH_SYN) - tcp_dooptions((struct tcpcb *)NULL, &to, optp, optlen, 1, th); + tcp_dooptions(&to, optp, optlen, 1); if (tcp_timewait((struct tcptw *)inp->inp_ppcb, &to, th, m, tlen)) goto findpcb; @@ -961,7 +960,7 @@ findpcb: tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif - tcp_dooptions(tp, &to, optp, optlen, 1, th); + tcp_dooptions(&to, optp, optlen, 1); if (!syncache_add(&inc, &to, th, &so, m)) goto drop; if (so == NULL) { @@ -1082,7 +1081,7 @@ after_listen: * for incoming connections is handled in tcp_syncache. * XXX this is traditional behavior, may need to be cleaned up. */ - tcp_dooptions(tp, &to, optp, optlen, thflags & TH_SYN, th); + tcp_dooptions(&to, optp, optlen, thflags & TH_SYN); if (thflags & TH_SYN) { if (to.to_flags & TOF_SCALE) { tp->t_flags |= TF_RCVD_SCALE; @@ -1104,11 +1103,6 @@ after_listen: } - if (tp->sack_enable) { - /* Delete stale (cumulatively acked) SACK holes */ - tcp_del_sackholes(tp, th); - } - /* * Header prediction: check for the two common cases * of a uni-directional data xfer. If the packet has @@ -1153,7 +1147,7 @@ after_listen: ((!tcp_do_newreno && !tp->sack_enable && tp->t_dupacks < tcprexmtthresh) || ((tcp_do_newreno || tp->sack_enable) && - !IN_FASTRECOVERY(tp)))) { + !IN_FASTRECOVERY(tp) && to.to_nsacks == 0))) { KASSERT(headlocked, ("headlocked")); INP_INFO_WUNLOCK(&tcbinfo); headlocked = 0; @@ -1824,6 +1818,12 @@ trimthenstep6: case TCPS_LAST_ACK: case TCPS_TIME_WAIT: KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); + if (SEQ_GT(th->th_ack, tp->snd_max)) { + tcpstat.tcps_rcvacktoomuch++; + goto dropafterack; + } + if (tp->sack_enable) + tcp_sack_doack(tp, &to, th->th_ack); if (SEQ_LEQ(th->th_ack, tp->snd_una)) { if (tlen == 0 && tiwin == tp->snd_wnd) { tcpstat.tcps_rcvdupack++; @@ -2002,10 +2002,6 @@ trimthenstep6: tp->snd_cwnd = tp->snd_ssthresh; } tp->t_dupacks = 0; - if (SEQ_GT(th->th_ack, tp->snd_max)) { - tcpstat.tcps_rcvacktoomuch++; - goto dropafterack; - } /* * If we reach this point, ACK is not a duplicate, * i.e., it ACKs something we sent. @@ -2560,13 +2556,11 @@ drop: * Parse TCP options and place in tcpopt. */ static void -tcp_dooptions(tp, to, cp, cnt, is_syn, th) - struct tcpcb *tp; +tcp_dooptions(to, cp, cnt, is_syn) struct tcpopt *to; u_char *cp; int cnt; int is_syn; - struct tcphdr *th; { int opt, optlen; @@ -2642,10 +2636,12 @@ tcp_dooptions(tp, to, cp, cnt, is_syn, th) to->to_flags |= TOF_SACK; } break; - case TCPOPT_SACK: - if (!tp || tcp_sack_option(tp, th, cp, optlen)) + if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) continue; + to->to_nsacks = (optlen - 2) / TCPOLEN_SACK; + to->to_sacks = cp + 2; + tcpstat.tcps_sack_rcv_blocks++; break; default: continue; diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c index 945ab9ecf135..c15f827b568e 100644 --- a/sys/netinet/tcp_sack.c +++ b/sys/netinet/tcp_sack.c @@ -373,54 +373,54 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) } /* - * Process the TCP SACK option. Returns 1 if tcp_dooptions() should continue, - * and 0 otherwise, if the option was fine. tp->snd_holes is an ordered list - * of holes (oldest to newest, in terms of the sequence space). + * Process cumulative ACK and the TCP SACK option to update the scoreboard. + * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of + * the sequence space). */ -int -tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) +void +tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) { - int tmp_olen; - u_char *tmp_cp; struct sackhole *cur, *temp; - struct sackblk sack, sack_blocks[TCP_MAX_SACK], *sblkp; + struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; int i, j, num_sack_blks; INP_LOCK_ASSERT(tp->t_inpcb); - if (!tp->sack_enable) - return (1); - if ((th->th_flags & TH_ACK) == 0) - return (1); - /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */ - if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) - return (1); - /* If ack is outside [snd_una, snd_max], ignore the SACK options */ - if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max)) - return (1); - tmp_cp = cp + 2; - tmp_olen = optlen - 2; - tcpstat.tcps_sack_rcv_blocks++; - /* - * Sort the SACK blocks so we can update the scoreboard - * with just one pass. The overhead of sorting upto 4 elements - * is less than making upto 4 passes over the scoreboard. - */ + num_sack_blks = 0; - while (tmp_olen > 0) { - bcopy(tmp_cp, &sack, sizeof(sack)); + /* + * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, + * treat [SND.UNA, SEG.ACK) as if it is a SACK block. + */ + if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) { + sack_blocks[num_sack_blks].start = tp->snd_una; + sack_blocks[num_sack_blks++].end = th_ack; + } + /* + * Append received valid SACK blocks to sack_blocks[]. + */ + for (i = 0; i < to->to_nsacks; i++) { + bcopy((to->to_sacks + i * TCPOLEN_SACK), &sack, sizeof(sack)); sack.start = ntohl(sack.start); sack.end = ntohl(sack.end); if (SEQ_GT(sack.end, sack.start) && SEQ_GT(sack.start, tp->snd_una) && - SEQ_GT(sack.start, th->th_ack) && + SEQ_GT(sack.start, th_ack) && SEQ_LEQ(sack.end, tp->snd_max)) sack_blocks[num_sack_blks++] = sack; - tmp_olen -= TCPOLEN_SACK; - tmp_cp += TCPOLEN_SACK; } + + /* + * Return if SND.UNA is not advanced and no valid SACK block + * is received. + */ if (num_sack_blks == 0) - return 0; - /* Bubble sort */ + return; + + /* + * Sort the SACK blocks so we can update the scoreboard + * with just one pass. The overhead of sorting upto 4+1 elements + * is less than making upto 4+1 passes over the scoreboard. + */ for (i = 0; i < num_sack_blks; i++) { for (j = i + 1; j < num_sack_blks; j++) { if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { @@ -437,7 +437,7 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) * (from the sack blocks received) are created later below (in * the logic that adds holes to the tail of the scoreboard). */ - tp->snd_fack = tp->snd_una; + tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); /* * In the while-loop below, incoming SACK blocks (sack_blocks[]) * and SACK holes (snd_holes) are traversed from their tails with @@ -460,7 +460,7 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) */ temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); if (temp == NULL) - return 0; + return; tp->snd_fack = sblkp->end; /* Go to the previous sack block. */ sblkp--; @@ -548,48 +548,11 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) else sblkp--; } - return (0); } /* - * Delete stale (i.e, cumulatively ack'd) holes. Hole is deleted only if - * it is completely acked; otherwise, tcp_sack_option(), called from - * tcp_dooptions(), will fix up the hole. + * Free all SACK holes to clear the scoreboard. */ -void -tcp_del_sackholes(tp, th) - struct tcpcb *tp; - struct tcphdr *th; -{ - INP_LOCK_ASSERT(tp->t_inpcb); - if (tp->sack_enable && tp->t_state != TCPS_LISTEN) { - /* max because this could be an older ack just arrived */ - tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ? - th->th_ack : tp->snd_una; - struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes); - struct sackhole *prev; - while (cur) - if (SEQ_LEQ(cur->end, lastack)) { - prev = cur; - cur = TAILQ_NEXT(cur, scblink); - tp->sackhint.sack_bytes_rexmit -= - (prev->rxmit - prev->start); - tcp_sackhole_remove(tp, prev); - } else if (SEQ_LT(cur->start, lastack)) { - if (SEQ_LT(cur->rxmit, lastack)) { - tp->sackhint.sack_bytes_rexmit -= - (cur->rxmit - cur->start); - cur->rxmit = lastack; - } else - tp->sackhint.sack_bytes_rexmit -= - (lastack - cur->start); - cur->start = lastack; - break; - } else - break; - } -} - void tcp_free_sackholes(struct tcpcb *tp) { diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index d2f34ea9c088..b1515ddf6d29 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -238,7 +238,8 @@ struct tcpopt { u_int32_t to_tsecr; u_int16_t to_mss; u_int8_t to_requested_s_scale; - u_int8_t to_pad; + u_int8_t to_nsacks; /* number of SACK blocks */ + u_char *to_sacks; /* pointer to the first SACK blocks */ }; #ifdef _NETINET_IN_PCB_H_ @@ -578,9 +579,8 @@ extern u_long tcp_sendspace; extern u_long tcp_recvspace; tcp_seq tcp_new_isn(struct tcpcb *); -int tcp_sack_option(struct tcpcb *,struct tcphdr *,u_char *,int); +void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq); void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); -void tcp_del_sackholes(struct tcpcb *, struct tcphdr *); void tcp_clean_sackreport(struct tcpcb *tp); void tcp_sack_adjust(struct tcpcb *tp); struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);