diff options
author | Florin Coras <fcoras@cisco.com> | 2018-11-02 12:52:10 -0700 |
---|---|---|
committer | Marco Varlese <marco.varlese@suse.de> | 2018-11-05 08:20:51 +0000 |
commit | 36ee9f1ca37daf277c2cd8d33bf16eabc15773e5 (patch) | |
tree | 14c53a845447cb77efeaea2a7dad79a3f2e6ca92 /src/vnet/tcp/tcp_output.c | |
parent | 40cca7585d969499e92b98c32956bbe3f2050e4e (diff) |
tcp: send unsent data in fast recovery
Allows sending of unsent data in fast recovery and consolidates logic in
tcp, instead of splitting it between tcp fast retransmit and tcp output
path called by the session layer.
Change-Id: I9b12cdf2aa2ac50b9f25e46856fed037163501fe
Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp/tcp_output.c')
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 231 |
1 files changed, 156 insertions, 75 deletions
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 78f7c3f8294..b15cf9b362b 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1240,49 +1240,27 @@ tcp_timer_delack_handler (u32 index) } /** - * Build a retransmit segment + * Allocate a new buffer and build a new tcp segment * - * @return the number of bytes in the segment or 0 if there's nothing to - * retransmit + * @param wrk tcp worker + * @param tc connection for which the segment will be allocated + * @param offset offset of the first byte in the tx fifo + * @param max_deq_byte segment size + * @param[out] b pointer to buffer allocated + * + * @return the number of bytes in the segment or 0 if buffer cannot be + * allocated or no data available */ -static u32 -tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk, - tcp_connection_t * tc, u32 offset, - u32 max_deq_bytes, vlib_buffer_t ** b) +static int +tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, + u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b) { u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer; + u32 bi, seg_size; vlib_main_t *vm = wrk->vm; int n_bytes = 0; - u32 start, bi, available_bytes, seg_size; u8 *data; - ASSERT (tc->state >= TCP_STATE_ESTABLISHED); - ASSERT (max_deq_bytes != 0); - - /* - * Make sure we can retransmit something - */ - available_bytes = session_tx_fifo_max_dequeue (&tc->connection); - ASSERT (available_bytes >= offset); - available_bytes -= offset; - if (!available_bytes) - return 0; - max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes); - max_deq_bytes = clib_min (available_bytes, max_deq_bytes); - - /* Start is beyond snd_congestion */ - start = tc->snd_una + offset; - if (seq_geq (start, tc->snd_congestion)) - goto done; - - /* Don't overshoot snd_congestion */ - if (seq_gt (start + max_deq_bytes, tc->snd_congestion)) - { - max_deq_bytes = tc->snd_congestion - start; - if (max_deq_bytes == 0) - goto done; - } - seg_size = max_deq_bytes + MAX_HDRS_LEN; /* @@ -1374,6 +1352,55 @@ tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk, ASSERT (n_bytes > 0); ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer); + return n_bytes; +} + +/** + * Build a retransmit segment + * + * @return the number of bytes in the segment or 0 if there's nothing to + * retransmit + */ +static u32 +tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk, + tcp_connection_t * tc, u32 offset, + u32 max_deq_bytes, vlib_buffer_t ** b) +{ + u32 start, available_bytes; + int n_bytes = 0; + + ASSERT (tc->state >= TCP_STATE_ESTABLISHED); + ASSERT (max_deq_bytes != 0); + + /* + * Make sure we can retransmit something + */ + available_bytes = session_tx_fifo_max_dequeue (&tc->connection); + ASSERT (available_bytes >= offset); + available_bytes -= offset; + if (!available_bytes) + return 0; + + max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes); + max_deq_bytes = clib_min (available_bytes, max_deq_bytes); + + /* Start is beyond snd_congestion */ + start = tc->snd_una + offset; + if (seq_geq (start, tc->snd_congestion)) + goto done; + + /* Don't overshoot snd_congestion */ + if (seq_gt (start + max_deq_bytes, tc->snd_congestion)) + { + max_deq_bytes = tc->snd_congestion - start; + if (max_deq_bytes == 0) + goto done; + } + + n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b); + if (!n_bytes) + return 0; + if (tcp_in_fastrecovery (tc)) tc->snd_rxt_bytes += n_bytes; @@ -1696,6 +1723,36 @@ tcp_retransmit_first_unacked (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) return 0; } +static int +tcp_fast_retransmit_unsent (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, + u32 burst_size) +{ + u32 offset, n_segs = 0, n_written, bi; + vlib_main_t *vm = wrk->vm; + vlib_buffer_t *b = 0; + + tc->snd_nxt = tc->snd_una_max; + offset = tc->snd_una_max - tc->snd_una; + while (n_segs < burst_size) + { + n_written = tcp_prepare_segment (wrk, tc, offset, tc->snd_mss, &b); + if (!n_written) + goto done; + + bi = vlib_get_buffer_index (vm, b); + tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); + offset += n_written; + n_segs += 1; + } + +done: + return n_segs; +} + +#define scoreboard_rescue_rxt_valid(_sb, _tc) \ + (seq_geq (_sb->rescue_rxt, _tc->snd_una) \ + && seq_leq (_sb->rescue_rxt, _tc->snd_congestion)) + /** * Do fast retransmit with SACKs */ @@ -1703,55 +1760,54 @@ int tcp_fast_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, u32 burst_size) { + u32 n_written = 0, offset, max_bytes, n_segs = 0, n_segs_now; + sack_scoreboard_hole_t *hole; vlib_main_t *vm = wrk->vm; - u32 n_written = 0, offset, max_bytes, n_segs = 0; vlib_buffer_t *b = 0; - sack_scoreboard_hole_t *hole; sack_scoreboard_t *sb; u32 bi, old_snd_nxt; int snd_space; + u32 max_deq; u8 snd_limited = 0, can_rescue = 0; ASSERT (tcp_in_fastrecovery (tc)); - old_snd_nxt = tc->snd_nxt; - sb = &tc->sack_sb; snd_space = tcp_available_cc_snd_space (tc); - hole = scoreboard_get_hole (sb, sb->cur_rxt_hole); - if (snd_space < tc->snd_mss) { tcp_program_fastretransmit (wrk, tc); - goto done; + return 0; } TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0); + old_snd_nxt = tc->snd_nxt; + sb = &tc->sack_sb; + hole = scoreboard_get_hole (sb, sb->cur_rxt_hole); + + max_deq = session_tx_fifo_max_dequeue (&tc->connection); + max_deq -= tc->snd_una_max - tc->snd_una; + while (snd_space > 0 && n_segs < burst_size) { - hole = scoreboard_next_rxt_hole (sb, hole, - tcp_fastrecovery_sent_1_smss (tc), - &can_rescue, &snd_limited); + hole = scoreboard_next_rxt_hole (sb, hole, max_deq, &can_rescue, + &snd_limited); if (!hole) { - if (!can_rescue || !(seq_lt (sb->rescue_rxt, tc->snd_una) - || seq_gt (sb->rescue_rxt, - tc->snd_congestion))) + if (max_deq) { - if (tcp_fastrecovery_first (tc)) - break; - - /* We tend to lose the first segment. Try re-resending - * it but only once and after we've tried everything */ - hole = scoreboard_first_hole (sb); - if (hole && hole->start == tc->snd_una) - { - tcp_retransmit_first_unacked (wrk, tc); - tcp_fastrecovery_first_on (tc); - n_segs += 1; - } - break; + snd_space = clib_min (max_deq, snd_space); + burst_size = clib_min (burst_size - n_segs, + snd_space / tc->snd_mss); + n_segs_now = tcp_fast_retransmit_unsent (wrk, tc, burst_size); + if (max_deq > n_segs_now * tc->snd_mss) + tcp_program_fastretransmit (wrk, tc); + n_segs += n_segs_now; + goto done; } + if (!can_rescue || scoreboard_rescue_rxt_valid (sb, tc)) + break; + /* If rescue rxt undefined or less than snd_una then one segment of * up to SMSS octets that MUST include the highest outstanding * unSACKed sequence number SHOULD be returned, and RescueRxt set to @@ -1778,19 +1834,21 @@ tcp_fast_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes; if (max_bytes == 0) break; + offset = sb->high_rxt - tc->snd_una; tc->snd_nxt = sb->high_rxt; n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes, &b); + ASSERT (n_written <= snd_space); /* Nothing left to retransmit */ if (n_written == 0) break; bi = vlib_get_buffer_index (vm, b); - sb->high_rxt += n_written; tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); - ASSERT (n_written <= snd_space); + + sb->high_rxt += n_written; snd_space -= n_written; n_segs += 1; } @@ -1811,24 +1869,26 @@ int tcp_fast_retransmit_no_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, u32 burst_size) { - u32 n_written = 0, offset = 0, bi, old_snd_nxt; + u32 n_written = 0, offset = 0, bi, old_snd_nxt, max_deq, n_segs_now; vlib_main_t *vm = wrk->vm; int snd_space, n_segs = 0; vlib_buffer_t *b; ASSERT (tcp_in_fastrecovery (tc)); TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0); - - /* Start resending from first un-acked segment */ old_snd_nxt = tc->snd_nxt; - tc->snd_nxt = tc->snd_una; - snd_space = tcp_available_cc_snd_space (tc); + if (!tcp_fastrecovery_first (tc)) + goto send_unsent; + + /* RFC 6582: [If a partial ack], retransmit the first unacknowledged + * segment. */ + snd_space = tc->sack_sb.last_bytes_delivered; + tc->snd_nxt = tc->snd_una; while (snd_space > 0 && n_segs < burst_size) { - offset += n_written; - n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, snd_space, - &b); + n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, + tc->snd_mss, &b); /* Nothing left to retransmit */ if (n_written == 0) @@ -1837,16 +1897,37 @@ tcp_fast_retransmit_no_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, bi = vlib_get_buffer_index (vm, b); tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); snd_space -= n_written; + offset += n_written; n_segs += 1; } - /* More data to resend */ - if (seq_lt (tc->snd_nxt, tc->snd_congestion)) - tcp_program_fastretransmit (wrk, tc); + if (n_segs == burst_size) + goto done; + +send_unsent: - /* Restore snd_nxt. If window allows, send 1 SMSS of new data */ + /* RFC 6582: Send a new segment if permitted by the new value of cwnd. */ + snd_space = tcp_available_cc_snd_space (tc); + if (snd_space < tc->snd_mss) + goto done; + + max_deq = session_tx_fifo_max_dequeue (&tc->connection); + max_deq -= tc->snd_una_max - tc->snd_una; + if (max_deq) + { + snd_space = clib_min (max_deq, snd_space); + burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss); + n_segs_now = tcp_fast_retransmit_unsent (wrk, tc, burst_size); + if (max_deq > n_segs_now * tc->snd_mss) + tcp_program_fastretransmit (wrk, tc); + n_segs += n_segs_now; + } + + /* Restore snd_nxt */ tc->snd_nxt = old_snd_nxt; +done: + tcp_fastrecovery_first_off (tc); return n_segs; } |