From 9d17a7a64a961598726ae566326b7389547e4081 Mon Sep 17 00:00:00 2001 From: Paul Saab Date: Sat, 4 Jun 2005 08:03:28 +0000 Subject: [PATCH] Changes to tcp_sack_option() that - Walks the scoreboard backwards from the tail to reduce the number of comparisons for each sack option received. - Introduce functions to add/remove sack scoreboard elements, making the code more readable. Submitted by: Noritoshi Demizu Reviewed by: Raja Mukerji, Mohan Srinivasan --- sys/netinet/tcp_sack.c | 212 +++++++++++++++++++++++------------------ sys/netinet/tcp_var.h | 3 +- 2 files changed, 121 insertions(+), 94 deletions(-) diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c index 5ff7c34abc4b..28c6d8a4d420 100644 --- a/sys/netinet/tcp_sack.c +++ b/sys/netinet/tcp_sack.c @@ -328,6 +328,50 @@ tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole) KASSERT(tcp_sack_globalholes >= 0, ("tcp_sack_globalholes >= 0")); } +/* + * Insert new SACK hole into scoreboard. + */ +static struct sackhole * +tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end, + struct sackhole *after) +{ + struct sackhole *hole; + + /* Allocate a new SACK hole. */ + hole = tcp_sackhole_alloc(tp, start, end); + if (hole == NULL) + return NULL; + + /* Insert the new SACK hole into scoreboard */ + if (after != NULL) + TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink); + else + TAILQ_INSERT_TAIL(&tp->snd_holes, hole, scblink); + + /* Update SACK hint. */ + if (tp->sackhint.nexthole == NULL) + tp->sackhint.nexthole = hole; + + return hole; +} + +/* + * Remove SACK hole from scoreboard. + */ +static void +tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) +{ + /* Update SACK hint. */ + if (tp->sackhint.nexthole == hole) + tp->sackhint.nexthole = TAILQ_NEXT(hole, scblink); + + /* Remove this SACK hole. */ + TAILQ_REMOVE(&tp->snd_holes, hole, scblink); + + /* Free this SACK hole. */ + tcp_sackhole_free(tp, hole); +} + /* * Process the TCP SACK option. Returns 1 if tcp_dooptions() should continue, * and 0 otherwise, if the option was fine. tp->snd_holes is an ordered list @@ -339,8 +383,8 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) int tmp_olen; u_char *tmp_cp; struct sackhole *cur, *temp; - struct sackblk sack, sack_blocks[TCP_MAX_SACK]; - int i, j, next_sack_blk, num_sack_blks; + struct sackblk sack, sack_blocks[TCP_MAX_SACK], *sblkp; + int i, j, num_sack_blks; INP_LOCK_ASSERT(tp->t_inpcb); if (!tp->sack_enable) @@ -359,7 +403,7 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) /* * Sort the SACK blocks so we can update the scoreboard * with just one pass. The overhead of sorting upto 4 elements - * is less than making 3 passes over the scoreboard. + * is less than making upto 4 passes over the scoreboard. */ num_sack_blks = 0; while (tmp_olen > 0) { @@ -379,8 +423,7 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) /* Bubble sort */ for (i = 0; i < num_sack_blks; i++) { for (j = i + 1; j < num_sack_blks; j++) { - if (SEQ_GT(sack_blocks[i].start, - sack_blocks[j].start)){ + if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { sack = sack_blocks[i]; sack_blocks[i] = sack_blocks[j]; sack_blocks[j] = sack; @@ -395,52 +438,66 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) * the logic that adds holes to the tail of the scoreboard). */ tp->snd_fack = tp->snd_una; - next_sack_blk = 0; - cur = TAILQ_FIRST(&tp->snd_holes); + /* + * In the while-loop below, incoming SACK blocks (sack_blocks[]) + * and SACK holes (snd_holes) are traversed from their tails with + * just one pass in order to reduce the number of compares especially + * when the bandwidth-delay product is large. + * Note: Typically, in the first RTT of SACK recovery, the highest + * three or four SACK blocks with the same ack number are received. + * In the second RTT, if retransmitted data segments are not lost, + * the highest three or four SACK blocks with ack number advancing + * are received. + */ + sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */ + if (SEQ_LT(tp->snd_fack, sblkp->start)) { + /* + * The highest SACK block is beyond fack. + * Append new SACK hole at the tail. + * If the second or later highest SACK blocks are also + * beyond the current fack, they will be inserted by + * way of hole splitting in the while-loop below. + */ + tcp_sackhole_insert(tp, tp->snd_fack, sblkp->start, NULL); + tp->snd_fack = sblkp->end; + /* Go to the previous sack block. */ + sblkp--; + } else if (SEQ_LT(tp->snd_fack, sblkp->end)) + /* fack is advanced. */ + tp->snd_fack = sblkp->end; + cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole */ /* * Since the incoming sack blocks are sorted, we can process them * making one sweep of the scoreboard. */ - while ((next_sack_blk < num_sack_blks) && (cur != NULL)) { - sack = sack_blocks[next_sack_blk]; - if (SEQ_LT(tp->snd_fack, sack.start)) - /* - * The sack block acks data to the right of all the holes - * in the scoreboard. No need to iterate over the - * scoreboard anymore. - */ - break; - if (SEQ_LEQ(sack.end, cur->start)) { - /* - * SACKs data before the current hole. - * Ignore the sack block. Go to the next sack - * block. - */ - next_sack_blk++; - continue; - } - if (SEQ_GEQ(sack.start, cur->end)) { + while (sblkp - sack_blocks >= 0) { + KASSERT(cur != NULL, ("cur != NULL")); + if (SEQ_GEQ(sblkp->start, cur->end)) { /* * SACKs data beyond the current hole. - * Go to the next hole. + * Go to the previous sack block. */ - cur = TAILQ_NEXT(cur, scblink); + sblkp--; + continue; + } + if (SEQ_LEQ(sblkp->end, cur->start)) { + /* + * SACKs data before the current hole. + * Go to the previous hole. + */ + cur = TAILQ_PREV(cur, sackhole_head, scblink); continue; } tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, ("sackhint bytes rtx >= 0")); - if (SEQ_LEQ(sack.start, cur->start)) { + if (SEQ_LEQ(sblkp->start, cur->start)) { /* Data acks at least the beginning of hole */ - if (SEQ_GEQ(sack.end, cur->end)) { + if (SEQ_GEQ(sblkp->end, cur->end)) { /* Acks entire hole, so delete hole */ - if (tp->sackhint.nexthole == cur) - tp->sackhint.nexthole = - TAILQ_NEXT(cur, scblink); temp = cur; - cur = TAILQ_NEXT(cur, scblink); - TAILQ_REMOVE(&tp->snd_holes, temp, scblink); - tcp_sackhole_free(tp, temp); + cur = TAILQ_PREV(cur, sackhole_head, scblink); + tcp_sackhole_remove(tp, temp); /* * The sack block may ack all or part of the next * hole too, so continue onto the next hole. @@ -448,66 +505,40 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen) continue; } else { /* Move start of hole forward */ - cur->start = sack.end; + cur->start = sblkp->end; cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); } + /* Go to the previous hole. */ + cur = TAILQ_PREV(cur, sackhole_head, scblink); } else { - if (SEQ_GEQ(sack.end, cur->end)) { + /* Data acks at least the end of hole */ + if (SEQ_GEQ(sblkp->end, cur->end)) { /* Move end of hole backward */ - cur->end = sack.start; + cur->end = sblkp->start; cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); } else { /* * ACKs some data in middle of a hole; need to * split current hole */ - temp = tcp_sackhole_alloc(tp, sack.end, - cur->end); + temp = tcp_sackhole_insert(tp, sblkp->end, + cur->end, cur); if (temp != NULL) { - if (SEQ_GT(cur->rxmit, temp->rxmit)) + if (SEQ_GT(cur->rxmit, temp->rxmit)) { temp->rxmit = cur->rxmit; - TAILQ_INSERT_AFTER(&tp->snd_holes, - cur, temp, scblink); - cur->end = sack.start; - cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); - tp->sackhint.sack_bytes_rexmit += - (cur->rxmit - cur->start); - cur = temp; + tp->sackhint.sack_bytes_rexmit + += (temp->rxmit + - temp->start); + } + cur->end = sblkp->start; + cur->rxmit = SEQ_MIN(cur->rxmit, + cur->end); } } + /* Go to the previous sack block. */ + sblkp--; } tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start); - /* - * Testing sack.end against cur->end tells us whether we're done - * with the sack block or the sack hole. Accordingly, we advance - * one or the other. - */ - if (SEQ_GEQ(sack.end, cur->end)) - cur = TAILQ_NEXT(cur, scblink); - else - next_sack_blk++; - } - /* Iterated all the holes in the scoreboard. Add new holes. */ - for ( ; next_sack_blk < num_sack_blks ; next_sack_blk++) { - sack = sack_blocks[next_sack_blk]; - /* - * The two SEQ_LT() checks here that test rcv_laststart against - * sack.start and sack.end seem redundant, but they're necessary - * to deal with overlapping sack blocks. - */ - if (SEQ_LT(tp->snd_fack, sack.start)) { - /* Need to append new hole at end. */ - temp = tcp_sackhole_alloc(tp, tp->snd_fack, - sack.start); - if (temp == NULL) - continue; /* ENOBUFS */ - TAILQ_INSERT_TAIL(&tp->snd_holes, temp, scblink); - tp->snd_fack = sack.end; - if (tp->sackhint.nexthole == NULL) - tp->sackhint.nexthole = temp; - } - if (SEQ_LT(tp->snd_fack, sack.end)) - tp->snd_fack = sack.end; } return (0); } @@ -532,14 +563,10 @@ tcp_del_sackholes(tp, th) while (cur) if (SEQ_LEQ(cur->end, lastack)) { prev = cur; - tp->sackhint.sack_bytes_rexmit -= - (cur->rxmit - cur->start); - if (tp->sackhint.nexthole == cur) - tp->sackhint.nexthole = - TAILQ_NEXT(cur, scblink); cur = TAILQ_NEXT(cur, scblink); - TAILQ_REMOVE(&tp->snd_holes, prev, scblink); - tcp_sackhole_free(tp, prev); + tp->sackhint.sack_bytes_rexmit -= + (prev->rxmit - prev->start); + tcp_sackhole_remove(tp, prev); } else if (SEQ_LT(cur->start, lastack)) { if (SEQ_LT(cur->rxmit, lastack)) { tp->sackhint.sack_bytes_rexmit -= @@ -561,14 +588,13 @@ tcp_free_sackholes(struct tcpcb *tp) struct sackhole *q; INP_LOCK_ASSERT(tp->t_inpcb); - while ((q = TAILQ_FIRST(&tp->snd_holes)) != NULL) { - TAILQ_REMOVE(&tp->snd_holes, q, scblink); - tcp_sackhole_free(tp, q); - } - tp->sackhint.nexthole = NULL; + while ((q = TAILQ_FIRST(&tp->snd_holes)) != NULL) + tcp_sackhole_remove(tp, q); tp->sackhint.sack_bytes_rexmit = 0; KASSERT(tp->snd_numholes == 0, ("tp->snd_numholes == 0")); + KASSERT(tp->sackhint.nexthole == NULL, + ("tp->sackhint.nexthole == NULL")); } /* @@ -668,7 +694,7 @@ tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt) hole = dbg_hole; } if (*sack_bytes_rexmt != dbg_bytes_rexmt) { - printf("%s: Computed sack_bytes_retransmitted (%d) not" + printf("%s: Computed sack_bytes_retransmitted (%d) not " "the same as cached value (%d)\n", __func__, dbg_bytes_rexmt, *sack_bytes_rexmt); *sack_bytes_rexmt = dbg_bytes_rexmt; diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 108c1a6684ac..d2f34ea9c088 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -191,7 +191,8 @@ struct tcpcb { /* SACK related state */ int sack_enable; /* enable SACK for this connection */ int snd_numholes; /* number of holes seen by sender */ - TAILQ_HEAD(, sackhole) snd_holes; /* SACK scoreboard (sorted) */ + TAILQ_HEAD(sackhole_head, sackhole) snd_holes; + /* SACK scoreboard (sorted) */ tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/ int rcv_numsacks; /* # distinct sack blks present */ struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */