diff options
author | Florin Coras <fcoras@cisco.com> | 2019-09-06 12:56:58 -0700 |
---|---|---|
committer | John Lo <loj@cisco.com> | 2019-09-11 14:37:59 +0000 |
commit | 558e3e09577a7b49e2fec58e8ac27f3f3ae0592f (patch) | |
tree | 978f8e420d41f7819961f871f890d6c157dc6f76 | |
parent | 082ebeba166accc87619c45e7de0c29c9f00d254 (diff) |
tcp: handle sack reneging
Type: feature
Change-Id: I201155b1b92cf7e57310af726879dab039090582
Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r-- | src/plugins/unittest/tcp_test.c | 120 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.c | 8 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.h | 2 | ||||
-rwxr-xr-x | src/vnet/tcp/tcp_input.c | 208 |
4 files changed, 203 insertions, 135 deletions
diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c index 7aa24562d7e..42fa423bfae 100644 --- a/src/plugins/unittest/tcp_test.c +++ b/src/plugins/unittest/tcp_test.c @@ -125,7 +125,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) TCP_TEST ((hole->start == 900 && hole->end == 1000), "last hole start %u end %u", hole->start, hole->end); TCP_TEST ((sb->sacked_bytes == 400), "sacked bytes %d", sb->sacked_bytes); - TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); + TCP_TEST ((!sb->is_reneging), "is not reneging"); TCP_TEST ((sb->last_sacked_bytes == 400), "last sacked bytes %d", sb->last_sacked_bytes); TCP_TEST ((sb->high_sacked == 900), "high sacked %u", sb->high_sacked); @@ -153,29 +153,36 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) TCP_TEST ((hole->start == 0 && hole->end == 100), "first hole start %u end %u", hole->start, hole->end); TCP_TEST ((sb->sacked_bytes == 900), "sacked bytes %d", sb->sacked_bytes); - TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); + TCP_TEST ((!sb->is_reneging), "is not reneging"); TCP_TEST ((sb->high_sacked == 1000), "high sacked %u", sb->high_sacked); TCP_TEST ((sb->last_sacked_bytes == 500), "last sacked bytes %d", sb->last_sacked_bytes); TCP_TEST ((sb->lost_bytes == 100), "lost bytes %u", sb->lost_bytes); /* - * Ack until byte 100, all bytes are now acked + sacked + * Ack until byte 100 - this is reneging because we should ack until 1000 */ tcp_rcv_sacks (tc, 100); if (verbose) vlib_cli_output (vm, "\nack until byte 100:\n%U", format_tcp_scoreboard, sb, tc); - TCP_TEST ((pool_elts (sb->holes) == 0), - "scoreboard has %d elements", pool_elts (sb->holes)); - TCP_TEST ((sb->snd_una_adv == 900), - "snd_una_adv after ack %u", sb->snd_una_adv); + TCP_TEST ((pool_elts (sb->holes) == 0), "scoreboard has %d elements", + pool_elts (sb->holes)); + TCP_TEST ((sb->is_reneging), "is reneging"); + + /* + * Sack all up to 1000 + */ + tc->snd_una = 100; + tcp_rcv_sacks (tc, 1000); TCP_TEST ((sb->high_sacked == 1000), "max sacked byte %u", sb->high_sacked); TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); TCP_TEST ((sb->last_sacked_bytes == 0), "last sacked bytes %d", sb->last_sacked_bytes); TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((!sb->is_reneging), "is not reneging"); + /* * Add new block @@ -196,15 +203,12 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) vlib_cli_output (vm, "\nadd [1200, 1300] snd_una_max 1500, snd_una 1000:" " \n%U", format_tcp_scoreboard, sb, tc); - TCP_TEST ((sb->snd_una_adv == 0), - "snd_una_adv after ack %u", sb->snd_una_adv); + TCP_TEST ((!sb->is_reneging), "is not reneging"); TCP_TEST ((pool_elts (sb->holes) == 2), "scoreboard has %d holes", pool_elts (sb->holes)); hole = scoreboard_first_hole (sb); TCP_TEST ((hole->start == 1000 && hole->end == 1200), "first hole start %u end %u", hole->start, hole->end); - TCP_TEST ((sb->snd_una_adv == 0), - "snd_una_adv after ack %u", sb->snd_una_adv); TCP_TEST ((sb->high_sacked == 1300), "max sacked byte %u", sb->high_sacked); hole = scoreboard_last_hole (sb); TCP_TEST ((hole->start == 1300 && hole->end == 1500), @@ -217,28 +221,28 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) */ vec_reset_length (tc->rcv_opts.sacks); - tcp_rcv_sacks (tc, 1200); + /* Ack up to 1300 to avoid reneging */ + tcp_rcv_sacks (tc, 1300); if (verbose) - vlib_cli_output (vm, "\nsb ack up to byte 1200:\n%U", + vlib_cli_output (vm, "\nsb ack up to byte 1300:\n%U", format_tcp_scoreboard, sb, tc); - TCP_TEST ((sb->snd_una_adv == 100), - "snd_una_adv after ack %u", sb->snd_una_adv); TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); - TCP_TEST ((pool_elts (sb->holes) == 0), + TCP_TEST ((pool_elts (sb->holes) == 1), "scoreboard has %d elements", pool_elts (sb->holes)); TCP_TEST ((sb->last_bytes_delivered == 100), "last bytes delivered %d", sb->last_bytes_delivered); TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); - TCP_TEST ((sb->head == TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head); - TCP_TEST ((sb->tail == TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail); + TCP_TEST ((sb->head != TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head); + TCP_TEST ((sb->tail != TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail); + TCP_TEST ((!sb->is_reneging), "is not reneging"); /* * Add some more blocks and then remove all */ vec_reset_length (tc->rcv_opts.sacks); - tc->snd_una += sb->snd_una_adv; + tc->snd_una = 1300; tc->snd_nxt = tc->snd_una_max = 1900; for (i = 0; i < 5; i++) { @@ -265,6 +269,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tc->snd_una = 0; tc->snd_una_max = 1000; tc->snd_nxt = 1000; + vec_reset_length (tc->rcv_opts.sacks); for (i = 0; i < 5; i++) { vec_add1 (tc->rcv_opts.sacks, sacks[i * 2 + 1]); @@ -272,25 +277,34 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks); tcp_rcv_sacks (tc, 0); if (verbose) - vlib_cli_output (vm, "\nsb added odd blocks snd_una 0 snd_una_max 1500:" + vlib_cli_output (vm, "\nsb added odd blocks snd_una 0 snd_una_max 1000:" "\n%U", format_tcp_scoreboard, sb, tc); TCP_TEST ((pool_elts (sb->holes) == 5), "scoreboard has %d elements", pool_elts (sb->holes)); TCP_TEST ((sb->lost_bytes == 300), "lost bytes %u", sb->lost_bytes); + hole = scoreboard_last_hole (sb); + TCP_TEST ((hole->end == 900), "last hole end %u", hole->end); + TCP_TEST ((sb->high_sacked == 1000), "high sacked %u", sb->high_sacked); + /* + * Renege bytes from 950 to 1000 + */ tcp_rcv_sacks (tc, 950); if (verbose) vlib_cli_output (vm, "\nack [0, 950]:\n%U", format_tcp_scoreboard, sb, tc); - TCP_TEST ((pool_elts (sb->holes) == 0), - "scoreboard has %d elements", pool_elts (sb->holes)); - TCP_TEST ((sb->snd_una_adv == 50), "snd_una_adv %u", sb->snd_una_adv); - TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); - TCP_TEST ((sb->last_sacked_bytes == 0), - "last sacked bytes %d", sb->last_sacked_bytes); + TCP_TEST ((pool_elts (sb->holes) == 0), "scoreboard has %d elements", + pool_elts (sb->holes)); + TCP_TEST ((sb->is_reneging), "is reneging"); + TCP_TEST ((sb->sacked_bytes == 50), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->last_sacked_bytes == 0), "last sacked bytes %d", + sb->last_sacked_bytes); TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); + TCP_TEST ((sb->high_sacked == 1000), "high sacked %u", sb->high_sacked); + + scoreboard_clear (sb); /* * Inject one block, ack it and overlap hole @@ -317,17 +331,17 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) vlib_cli_output (vm, "\nsb ack [0, 800]:\n%U", format_tcp_scoreboard, sb, tc); - TCP_TEST ((pool_elts (sb->holes) == 0), + TCP_TEST ((pool_elts (sb->holes) == 1), "scoreboard has %d elements", pool_elts (sb->holes)); - TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); + TCP_TEST ((!sb->is_reneging), "is not reneging"); TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); TCP_TEST ((sb->last_sacked_bytes == 0), "last sacked bytes %d", sb->last_sacked_bytes); TCP_TEST ((sb->last_bytes_delivered == 400), "last bytes delivered %d", sb->last_bytes_delivered); TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); - TCP_TEST ((sb->head == TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head); - TCP_TEST ((sb->tail == TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail); + TCP_TEST ((sb->head != TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head); + TCP_TEST ((sb->tail != TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail); /* * One hole close to head, patch head, split in two and start acking @@ -370,46 +384,66 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) sb->last_bytes_delivered); TCP_TEST ((sb->lost_bytes == 300), "lost bytes %u", sb->lost_bytes); + /* + * Ack [100 300] in two steps + * + * Step 1. Ack [100 200] which delivers 100 of the bytes lost + */ tc->snd_una = 100; tcp_rcv_sacks (tc, 200); + TCP_TEST ((sb->sacked_bytes == 600), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->last_bytes_delivered == 0), "last bytes delivered %d", + sb->last_bytes_delivered); + TCP_TEST ((sb->lost_bytes == 200), "lost bytes %u", sb->lost_bytes); + + /* + * Step 2. Ack up to 300, although 300 400 is sacked, so this is interpreted + * as reneging. + */ tc->snd_una = 200; tcp_rcv_sacks (tc, 300); if (verbose) - vlib_cli_output (vm, "\nacked [0, 300] in two steps:\n%U", + vlib_cli_output (vm, "\nacked [100, 300] in two steps:\n%U", format_tcp_scoreboard, sb, tc); - TCP_TEST ((sb->sacked_bytes == 500), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->sacked_bytes == 600), "sacked bytes %d", sb->sacked_bytes); TCP_TEST ((sb->lost_bytes == 100), "lost bytes %u", sb->lost_bytes); - TCP_TEST ((sb->last_bytes_delivered == 100), "last bytes delivered %d", + TCP_TEST ((sb->last_bytes_delivered == 0), "last bytes delivered %d", sb->last_bytes_delivered); + TCP_TEST ((sb->is_reneging), "is reneging"); - tc->snd_una = 400; + /* + * Ack [300 500]. Delivers reneged segment [300 400] and reneges bytes + * above 500 + */ + tc->snd_una = 300; tcp_rcv_sacks (tc, 500); if (verbose) vlib_cli_output (vm, "\nacked [400, 500]:\n%U", format_tcp_scoreboard, sb, tc); TCP_TEST ((pool_elts (sb->holes) == 0), "scoreboard has %d elements", pool_elts (sb->holes)); - TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->sacked_bytes == 500), "sacked bytes %d", sb->sacked_bytes); TCP_TEST ((sb->last_sacked_bytes == 0), "last sacked bytes %d", sb->last_sacked_bytes); - TCP_TEST ((sb->last_bytes_delivered == 500), "last bytes delivered %d", + TCP_TEST ((sb->last_bytes_delivered == 100), "last bytes delivered %d", sb->last_bytes_delivered); - TCP_TEST ((sb->lost_bytes == 0), "lost bytes %u", sb->lost_bytes); - TCP_TEST ((sb->snd_una_adv == 500), "snd_una_adv %u", sb->snd_una_adv); + TCP_TEST ((sb->is_reneging), "is reneging"); TCP_TEST ((sb->head == TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head); TCP_TEST ((sb->tail == TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail); /* - * Re-ack high sacked, to make sure last_bytes_delivered and - * snd_una_adv are 0-ed + * Ack up to 1000 to deliver all bytes */ + tc->snd_una = 500; tcp_rcv_sacks (tc, 1000); if (verbose) vlib_cli_output (vm, "\nAck high sacked:\n%U", format_tcp_scoreboard, sb, tc); - TCP_TEST ((sb->last_bytes_delivered == 0), "last bytes delivered %d", + TCP_TEST ((sb->last_sacked_bytes == 0), "last sacked bytes %d", + sb->last_sacked_bytes); + TCP_TEST ((sb->last_bytes_delivered == 500), "last bytes delivered %d", sb->last_bytes_delivered); - TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); + TCP_TEST ((!sb->is_reneging), "is not reneging"); /* * Add [1200, 1500] and test that [1000, 1200] is lost (bytes condition) @@ -434,7 +468,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) TCP_TEST ((sb->last_bytes_delivered == 0), "last bytes delivered %d", sb->last_bytes_delivered); TCP_TEST ((sb->lost_bytes == 200), "lost bytes %u", sb->lost_bytes); - TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); + TCP_TEST ((!sb->is_reneging), "is not reneging"); return 0; } diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index ad9bbff5c0e..d88fc9730bb 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -268,6 +268,8 @@ tcp_connection_cleanup (tcp_connection_t * tc) tcp_cc_cleanup (tc); vec_free (tc->snd_sacks); vec_free (tc->snd_sacks_fl); + vec_free (tc->rcv_opts.sacks); + pool_free (tc->sack_sb.holes); if (tc->flags & TCP_CONN_RATE_SAMPLE) tcp_bt_cleanup (tc); @@ -1139,9 +1141,9 @@ format_tcp_scoreboard (u8 * s, va_list * args) s = format (s, "sacked_bytes %u last_sacked_bytes %u lost_bytes %u\n", sb->sacked_bytes, sb->last_sacked_bytes, sb->lost_bytes); - s = format (s, "%Ulast_bytes_delivered %u high_sacked %u snd_una_adv %u\n", + s = format (s, "%Ulast_bytes_delivered %u high_sacked %u is_reneging %u\n", format_white_space, indent, sb->last_bytes_delivered, - sb->high_sacked - tc->iss, sb->snd_una_adv); + sb->high_sacked - tc->iss, sb->is_reneging); s = format (s, "%Ucur_rxt_hole %u high_rxt %u rescue_rxt %u", format_white_space, indent, sb->cur_rxt_hole, sb->high_rxt - tc->iss, sb->rescue_rxt - tc->iss); @@ -2143,7 +2145,7 @@ tcp_scoreboard_replay (u8 * s, tcp_connection_t * tc, u8 verbose) /* Push segments */ tcp_rcv_sacks (dummy_tc, next_ack); if (has_new_ack) - dummy_tc->snd_una = next_ack + dummy_tc->sack_sb.snd_una_adv; + dummy_tc->snd_una = next_ack; if (verbose) s = format (s, "result: %U", format_tcp_scoreboard, diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 99735f2af70..4c4a9a525af 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -166,13 +166,13 @@ typedef struct _sack_scoreboard u32 sacked_bytes; /**< Number of bytes sacked in sb */ u32 last_sacked_bytes; /**< Number of bytes last sacked */ u32 last_bytes_delivered; /**< Sack bytes delivered to app */ - u32 snd_una_adv; /**< Bytes to add to snd_una */ u32 high_sacked; /**< Highest byte sacked (fack) */ u32 high_rxt; /**< Highest retransmitted sequence */ u32 rescue_rxt; /**< Rescue sequence number */ u32 lost_bytes; /**< Bytes lost as per RFC6675 */ u32 last_lost_bytes; /**< Number of bytes last lost */ u32 cur_rxt_hole; /**< Retransmitting from this hole */ + u8 is_reneging; #if TCP_SCOREBOARD_TRACE scoreboard_trace_elt_t *trace; diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 9c1f6309b96..7ab7020ee9b 100755 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -622,7 +622,7 @@ tcp_program_dequeue (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) vec_add1 (wrk->pending_deq_acked, tc->c_c_index); tc->flags |= TCP_CONN_DEQ_PENDING; } - tc->burst_acked += tc->bytes_acked + tc->sack_sb.snd_una_adv; + tc->burst_acked += tc->bytes_acked; } /** @@ -781,51 +781,71 @@ scoreboard_insert_hole (sack_scoreboard_t * sb, u32 prev_index, #endif /* CLIB_MARCH_VARIANT */ #ifndef CLIB_MARCH_VARIANT -static void -scoreboard_update_bytes (tcp_connection_t * tc, sack_scoreboard_t * sb) + +always_inline void +scoreboard_update_bytes (sack_scoreboard_t * sb, u32 ack, u32 snd_mss) { sack_scoreboard_hole_t *left, *right; - u32 bytes = 0, blks = 0; + u32 sacked = 0, blks = 0, old_sacked; + + old_sacked = sb->sacked_bytes; sb->last_lost_bytes = 0; sb->lost_bytes = 0; sb->sacked_bytes = 0; - left = scoreboard_last_hole (sb); - if (!left) - return; - if (seq_gt (sb->high_sacked, left->end)) + right = scoreboard_last_hole (sb); + if (!right) + { + sb->sacked_bytes = sb->high_sacked - ack; + return; + } + + if (seq_gt (sb->high_sacked, right->end)) { - bytes = sb->high_sacked - left->end; + sacked = sb->high_sacked - right->end; blks = 1; } - while ((right = left) - && bytes < (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss - && blks < TCP_DUPACK_THRESHOLD - /* left not updated if above conditions fail */ - && (left = scoreboard_prev_hole (sb, right))) + while (sacked < (TCP_DUPACK_THRESHOLD - 1) * snd_mss + && blks < TCP_DUPACK_THRESHOLD) { - bytes += right->start - left->end; + if (right->is_lost) + sb->lost_bytes += scoreboard_hole_bytes (right); + + left = scoreboard_prev_hole (sb, right); + if (!left) + { + ASSERT (right->start == ack || sb->is_reneging); + sacked += right->start - ack; + right = 0; + break; + } + + sacked += right->start - left->end; blks++; + right = left; } - /* left is first lost */ - if (left) + /* right is first lost */ + while (right) { - do + sb->lost_bytes += scoreboard_hole_bytes (right); + sb->last_lost_bytes += right->is_lost ? 0 : right->end - right->start; + right->is_lost = 1; + left = scoreboard_prev_hole (sb, right); + if (!left) { - sb->lost_bytes += scoreboard_hole_bytes (right); - sb->last_lost_bytes += left->is_lost ? 0 : left->end - left->start; - left->is_lost = 1; - left = scoreboard_prev_hole (sb, right); - if (left) - bytes += right->start - left->end; + ASSERT (right->start == ack || sb->is_reneging); + sacked += right->start - ack; + break; } - while ((right = left)); + sacked += right->start - left->end; + right = left; } - sb->sacked_bytes = bytes; + sb->sacked_bytes = sacked; + sb->last_sacked_bytes = sacked - (old_sacked - sb->last_bytes_delivered); } /** @@ -924,12 +944,12 @@ scoreboard_clear (sack_scoreboard_t * sb) sb->sacked_bytes = 0; sb->last_sacked_bytes = 0; sb->last_bytes_delivered = 0; - sb->snd_una_adv = 0; sb->high_sacked = 0; sb->high_rxt = 0; sb->lost_bytes = 0; sb->last_lost_bytes = 0; sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX; + sb->is_reneging = 0; } #endif /* CLIB_MARCH_VARIANT */ @@ -953,22 +973,18 @@ tcp_scoreboard_is_sane_post_recovery (tcp_connection_t * tc) void tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) { - sack_scoreboard_hole_t *hole, *next_hole, *last_hole; - u32 blk_index = 0, old_sacked_bytes, hole_index; + sack_scoreboard_hole_t *hole, *next_hole; sack_scoreboard_t *sb = &tc->sack_sb; - sack_block_t *blk, tmp; - int i, j; + sack_block_t *blk, *rcv_sacks; + u32 blk_index = 0, i, j; sb->last_sacked_bytes = 0; sb->last_bytes_delivered = 0; - sb->snd_una_adv = 0; if (!tcp_opts_sack (&tc->rcv_opts) && sb->head == TCP_INVALID_SACK_HOLE_INDEX) return; - old_sacked_bytes = sb->sacked_bytes; - /* Remove invalid blocks */ blk = tc->rcv_opts.sacks; while (blk < vec_end (tc->rcv_opts.sacks)) @@ -988,9 +1004,9 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) /* Add block for cumulative ack */ if (seq_gt (ack, tc->snd_una)) { - tmp.start = tc->snd_una; - tmp.end = ack; - vec_add1 (tc->rcv_opts.sacks, tmp); + vec_add2 (tc->rcv_opts.sacks, blk, 1); + blk->start = tc->snd_una; + blk->end = ack; } if (vec_len (tc->rcv_opts.sacks) == 0) @@ -999,35 +1015,60 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) tcp_scoreboard_trace_add (tc, ack); /* Make sure blocks are ordered */ - for (i = 0; i < vec_len (tc->rcv_opts.sacks); i++) - for (j = i + 1; j < vec_len (tc->rcv_opts.sacks); j++) - if (seq_lt (tc->rcv_opts.sacks[j].start, tc->rcv_opts.sacks[i].start)) + rcv_sacks = tc->rcv_opts.sacks; + for (i = 0; i < vec_len (rcv_sacks); i++) + for (j = i + 1; j < vec_len (rcv_sacks); j++) + if (seq_lt (rcv_sacks[j].start, rcv_sacks[i].start)) { - tmp = tc->rcv_opts.sacks[i]; - tc->rcv_opts.sacks[i] = tc->rcv_opts.sacks[j]; - tc->rcv_opts.sacks[j] = tmp; + sack_block_t tmp = rcv_sacks[i]; + rcv_sacks[i] = rcv_sacks[j]; + rcv_sacks[j] = tmp; } if (sb->head == TCP_INVALID_SACK_HOLE_INDEX) { - /* If no holes, insert the first that covers all outstanding bytes */ - last_hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX, - tc->snd_una, tc->snd_nxt); - sb->tail = scoreboard_hole_index (sb, last_hole); - tmp = tc->rcv_opts.sacks[vec_len (tc->rcv_opts.sacks) - 1]; - sb->high_sacked = tmp.end; + /* Handle reneging as a special case */ + if (PREDICT_FALSE (sb->is_reneging)) + { + /* No holes, only sacked bytes */ + if (seq_leq (tc->snd_nxt, sb->high_sacked)) + { + /* No progress made so return */ + if (seq_leq (ack, tc->snd_una)) + return; + + /* Update sacked bytes delivered and return */ + sb->last_bytes_delivered = ack - tc->snd_una; + sb->sacked_bytes -= sb->last_bytes_delivered; + sb->is_reneging = seq_lt (ack, sb->high_sacked); + return; + } + + /* New hole above high sacked. Add it and process normally */ + hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX, + sb->high_sacked, tc->snd_nxt); + sb->tail = scoreboard_hole_index (sb, hole); + } + /* Not reneging and no holes. Insert the first that covers all + * outstanding bytes */ + else + { + hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX, + tc->snd_una, tc->snd_nxt); + sb->tail = scoreboard_hole_index (sb, hole); + } + sb->high_sacked = rcv_sacks[vec_len (rcv_sacks) - 1].end; } else { - /* If we have holes but snd_una_max is beyond the last hole, update - * last hole end */ - tmp = tc->rcv_opts.sacks[vec_len (tc->rcv_opts.sacks) - 1]; - last_hole = scoreboard_last_hole (sb); - if (seq_gt (tc->snd_nxt, last_hole->end)) + /* If we have holes but snd_nxt is beyond the last hole, update + * last hole end or add new hole after high sacked */ + hole = scoreboard_last_hole (sb); + if (seq_gt (tc->snd_nxt, hole->end)) { - if (seq_geq (last_hole->start, sb->high_sacked)) + if (seq_geq (hole->start, sb->high_sacked)) { - last_hole->end = tc->snd_nxt; + hole->end = tc->snd_nxt; } /* New hole after high sacked block */ else if (seq_lt (sb->high_sacked, tc->snd_nxt)) @@ -1036,17 +1077,22 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) tc->snd_nxt); } } + /* Keep track of max byte sacked for when the last hole * is acked */ - if (seq_gt (tmp.end, sb->high_sacked)) - sb->high_sacked = tmp.end; + sb->high_sacked = seq_max (rcv_sacks[vec_len (rcv_sacks) - 1].end, + sb->high_sacked); } /* Walk the holes with the SACK blocks */ hole = pool_elt_at_index (sb->holes, sb->head); - while (hole && blk_index < vec_len (tc->rcv_opts.sacks)) + + if (PREDICT_FALSE (sb->is_reneging)) + sb->last_bytes_delivered += hole->start - tc->snd_una; + + while (hole && blk_index < vec_len (rcv_sacks)) { - blk = &tc->rcv_opts.sacks[blk_index]; + blk = &rcv_sacks[blk_index]; if (seq_leq (blk->start, hole->start)) { /* Block covers hole. Remove hole */ @@ -1054,21 +1100,19 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) { next_hole = scoreboard_next_hole (sb, hole); - /* Byte accounting: snd_una needs to be advanced */ + /* If covered by ack, compute delivered bytes */ if (blk->end == ack) { - if (next_hole) + u32 sacked = next_hole ? next_hole->start : sb->high_sacked; + if (PREDICT_FALSE (seq_lt (ack, sacked))) { - if (seq_lt (ack, next_hole->start)) - sb->snd_una_adv = next_hole->start - ack; - sb->last_bytes_delivered += - next_hole->start - hole->end; + sb->last_bytes_delivered += ack - hole->end; + sb->is_reneging = 1; } else { - ASSERT (seq_geq (sb->high_sacked, ack)); - sb->snd_una_adv = sb->high_sacked - ack; - sb->last_bytes_delivered += sb->high_sacked - hole->end; + sb->last_bytes_delivered += sacked - hole->end; + sb->is_reneging = 0; } } scoreboard_remove_hole (sb, hole); @@ -1089,10 +1133,9 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) /* Hole must be split */ if (seq_lt (blk->end, hole->end)) { - hole_index = scoreboard_hole_index (sb, hole); + u32 hole_index = scoreboard_hole_index (sb, hole); next_hole = scoreboard_insert_hole (sb, hole_index, blk->end, hole->end); - /* Pool might've moved */ hole = scoreboard_get_hole (sb, hole_index); hole->end = blk->start; @@ -1107,24 +1150,15 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) } } - if (pool_elts (sb->holes) == 1) - { - hole = scoreboard_first_hole (sb); - if (hole->start == ack + sb->snd_una_adv && hole->end == tc->snd_nxt) - scoreboard_remove_hole (sb, hole); - } - - scoreboard_update_bytes (tc, sb); - sb->last_sacked_bytes = sb->sacked_bytes - - (old_sacked_bytes - sb->last_bytes_delivered); + scoreboard_update_bytes (sb, ack, tc->snd_mss); ASSERT (sb->last_sacked_bytes <= sb->sacked_bytes || tcp_in_recovery (tc)); ASSERT (sb->sacked_bytes == 0 || tcp_in_recovery (tc) - || sb->sacked_bytes < tc->snd_nxt - seq_max (tc->snd_una, ack)); + || sb->sacked_bytes <= tc->snd_nxt - seq_max (tc->snd_una, ack)); ASSERT (sb->last_sacked_bytes + sb->lost_bytes <= tc->snd_nxt - seq_max (tc->snd_una, ack) || tcp_in_recovery (tc)); ASSERT (sb->head == TCP_INVALID_SACK_HOLE_INDEX || tcp_in_recovery (tc) - || sb->holes[sb->head].start == ack + sb->snd_una_adv); + || sb->is_reneging || sb->holes[sb->head].start == ack); ASSERT (sb->last_lost_bytes <= sb->lost_bytes); TCP_EVT (TCP_EVT_CC_SCOREBOARD, tc); @@ -1464,16 +1498,14 @@ partial_ack: /* Remove retransmitted bytes that have been delivered */ if (tcp_opts_sack_permitted (&tc->rcv_opts)) { - ASSERT (tc->bytes_acked + tc->sack_sb.snd_una_adv - >= tc->sack_sb.last_bytes_delivered + ASSERT (tc->bytes_acked >= tc->sack_sb.last_bytes_delivered || (tc->flags & TCP_CONN_FINSNT)); /* If we have sacks and we haven't gotten an ack beyond high_rxt, * remove sacked bytes delivered */ if (seq_lt (tc->snd_una, tc->sack_sb.high_rxt)) { - rxt_delivered = tc->bytes_acked + tc->sack_sb.snd_una_adv - - tc->sack_sb.last_bytes_delivered; + rxt_delivered = tc->bytes_acked - tc->sack_sb.last_bytes_delivered; ASSERT (tc->snd_rxt_bytes >= rxt_delivered); tc->snd_rxt_bytes -= rxt_delivered; } @@ -1559,7 +1591,7 @@ process_ack: vnet_buffer (b)->tcp.ack_number, clib_net_to_host_u16 (th->window) << tc->snd_wscale); tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una; - tc->snd_una = vnet_buffer (b)->tcp.ack_number + tc->sack_sb.snd_una_adv; + tc->snd_una = vnet_buffer (b)->tcp.ack_number; tcp_validate_txf_size (tc, tc->bytes_acked); if (tc->bytes_acked) |