diff options
author | Florin Coras <fcoras@cisco.com> | 2018-10-31 23:09:22 -0700 |
---|---|---|
committer | Dave Barach <openvpp@barachs.net> | 2018-11-01 20:43:29 +0000 |
commit | e55a6d7a97044c2f4fd0231242e062924d75c7b6 (patch) | |
tree | ae58b4562748db8397604029261008b94aaecd8c /src/vnet/tcp | |
parent | f6c68d74eca9038516986254846de60a4f7c02ae (diff) |
tcp: fast retransmit pacing
Force pacing for fast retransmit to avoid bursts of retransmitted
packets.
Change-Id: I2ff42c328899b36322c4de557b1f7d853dba8fe2
Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp')
-rw-r--r-- | src/vnet/tcp/tcp.c | 16 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.h | 7 | ||||
-rwxr-xr-x | src/vnet/tcp/tcp_debug.h | 18 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_input.c | 87 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 10 |
5 files changed, 84 insertions, 54 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 8c0c5da07a1..bdf9c7af608 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -558,10 +558,11 @@ tcp_init_snd_vars (tcp_connection_t * tc) void tcp_enable_pacing (tcp_connection_t * tc) { - u32 max_burst, byte_rate; - max_burst = 16 * tc->snd_mss; + u32 initial_bucket, byte_rate; + initial_bucket = 16 * tc->snd_mss; byte_rate = 2 << 16; - transport_connection_tx_pacer_init (&tc->connection, byte_rate, max_burst); + transport_connection_tx_pacer_init (&tc->connection, byte_rate, + initial_bucket); tc->mrtt_us = (u32) ~ 0; } @@ -1318,6 +1319,7 @@ tcp_main_enable (vlib_main_t * vm) num_threads = 1 /* main thread */ + vtm->n_threads; vec_validate (tm->connections, num_threads - 1); + vec_validate (tm->wrk_ctx, num_threads - 1); /* * Preallocate connections. Assume that thread 0 won't @@ -1339,6 +1341,13 @@ tcp_main_enable (vlib_main_t * vm) if (preallocated_connections_per_thread) pool_init_fixed (tm->connections[thread], preallocated_connections_per_thread); + vec_validate (tm->wrk_ctx[thread].pending_fast_rxt, 0); + vec_validate (tm->wrk_ctx[thread].ongoing_fast_rxt, 0); + vec_validate (tm->wrk_ctx[thread].postponed_fast_rxt, 0); + vec_reset_length (tm->wrk_ctx[thread].pending_fast_rxt); + vec_reset_length (tm->wrk_ctx[thread].ongoing_fast_rxt); + vec_reset_length (tm->wrk_ctx[thread].postponed_fast_rxt); + tm->wrk_ctx[thread].vm = vlib_mains[thread]; } /* @@ -1358,7 +1367,6 @@ tcp_main_enable (vlib_main_t * vm) clib_spinlock_init (&tm->half_open_lock); } - vec_validate (tm->wrk_ctx, num_threads - 1); tcp_initialize_timer_wheels (tm); tm->bytes_per_buffer = vlib_buffer_free_list_buffer_size diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 4ba3d5e9d87..f7424c3202e 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -160,7 +160,7 @@ enum }; #define TCP_SCOREBOARD_TRACE (0) -#define TCP_MAX_SACK_BLOCKS 32 /**< Max number of SACK blocks stored */ +#define TCP_MAX_SACK_BLOCKS 256 /**< Max number of SACK blocks stored */ #define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0) typedef struct _scoreboard_trace_elt @@ -390,6 +390,9 @@ typedef struct tcp_worker_ctx_ needing fast rxt */ u32 *ongoing_fast_rxt; /**< vector of connections now doing fast rxt */ + u32 *postponed_fast_rxt; /**< vector of connections + that will do fast rxt */ + vlib_main_t *vm; /**< pointer to vm */ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); u8 cached_opts[40]; /**< cached 'on the wire' @@ -722,8 +725,8 @@ always_inline void tcp_cc_rcv_ack (tcp_connection_t * tc) { tc->cc_algo->rcv_ack (tc); - tcp_update_pacer (tc); tc->tsecr_last_ack = tc->rcv_opts.tsecr; + tcp_update_pacer (tc); } always_inline void diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index 8f626b1afeb..cd4a6f04d6e 100755 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -627,10 +627,8 @@ if (_av > 0) \ #if TCP_DEBUG_CC -#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \ +#define TCP_EVT_CC_EVT_PRINT(_tc, _sub_evt) \ { \ - if (_tc->snd_una != _tc->iss) \ - TCP_EVT_CC_STAT_PRINT (_tc); \ ELOG_TYPE_DECLARE (_e) = \ { \ .format = "cc: %s snd_space %u snd_una %u out %u flight %u", \ @@ -638,8 +636,8 @@ if (_av > 0) \ .n_enum_strings = 7, \ .enum_strings = { \ "fast-rxt", \ - "rxt-timeout", \ "first-rxt", \ + "rxt-timeout", \ "recovered", \ "congestion", \ "undo", \ @@ -653,8 +651,18 @@ if (_av > 0) \ ed->data[3] = tcp_bytes_out(_tc); \ ed->data[4] = tcp_flight_size (_tc); \ } + +#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \ +{ \ + if (_tc->snd_una != _tc->iss) \ + TCP_EVT_CC_STAT_PRINT (_tc); \ + if ((_sub_evt <= 1 && TCP_DEBUG_CC > 1) \ + || (_sub_evt > 1 && TCP_DEBUG_CC > 0)) \ + TCP_EVT_CC_EVT_PRINT (_tc, _sub_evt); \ +} #else -#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) +#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \ + #endif #if TCP_DEBUG_CC > 1 diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index ac0e996567e..154b9ac8363 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -1199,67 +1199,60 @@ void tcp_do_fastretransmits (u32 thread_index) { tcp_worker_ctx_t *wrk = &tcp_main.wrk_ctx[thread_index]; - u32 max_burst_size, burst_size, n_segs = 0; + u32 max_burst_size, burst_size, n_segs = 0, n_segs_now; + u32 *ongoing_fast_rxt, burst_bytes, sent_bytes; tcp_connection_t *tc; + u64 last_cpu_time; int i; - if (vec_len (wrk->pending_fast_rxt) == 0) + if (vec_len (wrk->pending_fast_rxt) == 0 + && vec_len (wrk->postponed_fast_rxt) == 0) return; - vec_append (wrk->ongoing_fast_rxt, wrk->pending_fast_rxt); - vec_reset_length (wrk->pending_fast_rxt); + last_cpu_time = wrk->vm->clib_time.last_cpu_time; + ongoing_fast_rxt = wrk->ongoing_fast_rxt; + vec_append (ongoing_fast_rxt, wrk->postponed_fast_rxt); + vec_append (ongoing_fast_rxt, wrk->pending_fast_rxt); + + _vec_len (wrk->postponed_fast_rxt) = 0; + _vec_len (wrk->pending_fast_rxt) = 0; max_burst_size = VLIB_FRAME_SIZE / vec_len (wrk->ongoing_fast_rxt); max_burst_size = clib_max (max_burst_size, 1); - for (i = 0; i < vec_len (wrk->ongoing_fast_rxt); i++) + for (i = 0; i < vec_len (ongoing_fast_rxt); i++) { - tc = tcp_connection_get (wrk->ongoing_fast_rxt[i], thread_index); + if (n_segs >= VLIB_FRAME_SIZE) + { + vec_add1 (wrk->postponed_fast_rxt, ongoing_fast_rxt[i]); + continue; + } + + tc = tcp_connection_get (ongoing_fast_rxt[i], thread_index); tc->flags &= ~TCP_CONN_FRXT_PENDING; if (!tcp_in_fastrecovery (tc)) continue; - /* TODO tx pacer instead of this */ - if (n_segs >= VLIB_FRAME_SIZE) + burst_size = clib_min (max_burst_size, VLIB_FRAME_SIZE - n_segs); + burst_bytes = transport_connection_tx_pacer_burst (&tc->connection, + last_cpu_time); + burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss); + if (!burst_size) { tcp_program_fastretransmit (tc); continue; } - burst_size = clib_min (max_burst_size, VLIB_FRAME_SIZE - n_segs); + n_segs_now = tcp_fast_retransmit (tc, burst_size); + sent_bytes = clib_min (n_segs_now * tc->snd_mss, burst_bytes); + transport_connection_tx_pacer_update_bytes (&tc->connection, + sent_bytes); - if (tc->cwnd > tc->ssthresh + 3 * tc->snd_mss) - { - /* The first segment MUST be retransmitted */ - if (tcp_retransmit_first_unacked (tc)) - { - tcp_program_fastretransmit (tc); - continue; - } - - /* Post retransmit update cwnd to ssthresh and account for the - * three segments that have left the network and should've been - * buffered at the receiver XXX */ - tc->cwnd = tc->ssthresh + 3 * tc->snd_mss; - - /* If cwnd allows, send more data */ - if (tcp_opts_sack_permitted (&tc->rcv_opts)) - { - scoreboard_init_high_rxt (&tc->sack_sb, - tc->snd_una + tc->snd_mss); - tc->sack_sb.rescue_rxt = tc->snd_una - 1; - n_segs += tcp_fast_retransmit_sack (tc, burst_size); - } - else - { - n_segs += tcp_fast_retransmit_no_sack (tc, burst_size); - } - } - else - n_segs += tcp_fast_retransmit (tc, burst_size); + n_segs += n_segs_now; } - vec_reset_length (wrk->ongoing_fast_rxt); + _vec_len (ongoing_fast_rxt) = 0; + wrk->ongoing_fast_rxt = ongoing_fast_rxt; } /** @@ -1298,6 +1291,7 @@ tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack) } else if (tcp_should_fastrecover (tc)) { + u32 byte_rate; ASSERT (!tcp_in_fastrecovery (tc)); /* Heuristic to catch potential late dupacks @@ -1313,8 +1307,21 @@ tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack) tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK); if (tcp_opts_sack_permitted (&tc->rcv_opts)) - tc->sack_sb.high_rxt = tc->snd_una; + { + tc->cwnd = tc->ssthresh; + scoreboard_init_high_rxt (&tc->sack_sb, tc->snd_una); + tc->sack_sb.rescue_rxt = tc->snd_una - 1; + } + else + { + /* Post retransmit update cwnd to ssthresh and account for the + * three segments that have left the network and should've been + * buffered at the receiver XXX */ + tc->cwnd = tc->ssthresh + 3 * tc->snd_mss; + } + byte_rate = (0.3 * tc->cwnd) / ((f64) TCP_TICK * tc->srtt); + transport_connection_tx_pacer_init (&tc->connection, byte_rate, 0); tcp_program_fastretransmit (tc); return; } diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index f14a61263d4..81579ef96a2 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1452,10 +1452,10 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID; } - TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1); - if (tc->state >= TCP_STATE_ESTABLISHED) { + TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2); + /* Lost FIN, retransmit and return */ if (tcp_is_lost_fin (tc)) { @@ -1536,6 +1536,8 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) return; } + TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2); + /* Try without increasing RTO a number of times. If this fails, * start growing RTO exponentially */ tc->rto_boff += 1; @@ -1562,6 +1564,8 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) /* Retransmit SYN-ACK */ else if (tc->state == TCP_STATE_SYN_RCVD) { + TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2); + tc->rto_boff += 1; if (tc->rto_boff > TCP_RTO_SYN_RETRIES) tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); @@ -1693,7 +1697,7 @@ tcp_retransmit_first_unacked (tcp_connection_t * tc) old_snd_nxt = tc->snd_nxt; tc->snd_nxt = tc->snd_una; - TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2); + TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1); n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b); if (!n_bytes) |