diff options
author | Florin Coras <fcoras@cisco.com> | 2018-11-07 12:49:19 -0800 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2018-11-08 11:20:29 +0000 |
commit | efefc6b4b219e2897e48def83352b4df52bc03a0 (patch) | |
tree | 50e7a57901e34365c1c4f5dc8868c705b192d864 /src/vnet/tcp | |
parent | 221d6f131d644b2d14f741c4b3031f53b8a8ff03 (diff) |
tcp: pacer and mrtt estimation improvements
- update pacer once per burst
- better estimate initial rtt
- compute smoothed average for higher precision rtt estimate
Change-Id: I06d41a98784cdf861bedfbee2e7d0afc0d0154ef
Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp')
-rw-r--r-- | src/vnet/tcp/tcp.c | 7 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.h | 1 | ||||
-rwxr-xr-x | src/vnet/tcp/tcp_debug.h | 9 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_input.c | 62 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 2 |
5 files changed, 61 insertions, 20 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index ea350dddc69..d759cf0d0cd 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -802,9 +802,10 @@ format_tcp_vars (u8 * s, va_list * args) tcp_rcv_wnd_available (tc)); s = format (s, " tsval_recent %u tsval_recent_age %u\n", tc->tsval_recent, tcp_time_now () - tc->tsval_recent_age); - s = format (s, " rto %u rto_boff %u srtt %u rttvar %u rtt_ts %2.5f ", - tc->rto, tc->rto_boff, tc->srtt, tc->rttvar, tc->rtt_ts); - s = format (s, "rtt_seq %u\n", tc->rtt_seq - tc->iss); + s = format (s, " rto %u rto_boff %u srtt %u us %.3f rttvar %u rtt_ts %x", + tc->rto, tc->rto_boff, tc->srtt, tc->mrtt_us * 1000, tc->rttvar, + tc->rtt_ts); + s = format (s, " rtt_seq %u\n", tc->rtt_seq - tc->iss); s = format (s, " cong: %U", format_tcp_congestion, tc); if (tc->state >= TCP_STATE_ESTABLISHED) diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 5a3a96570d2..843b90d987e 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -753,7 +753,6 @@ tcp_cc_rcv_ack (tcp_connection_t * tc) { tc->cc_algo->rcv_ack (tc); tc->tsecr_last_ack = tc->rcv_opts.tsecr; - tcp_connection_tx_pacer_update (tc); } always_inline void diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index cd4a6f04d6e..d125ee84612 100755 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -806,13 +806,14 @@ if (TCP_DEBUG_CC > 1) \ { \ ELOG_TYPE_DECLARE (_e) = \ { \ - .format = "rcv_stat: rto %u srtt %u rttvar %u ", \ - .format_args = "i4i4i4", \ + .format = "rcv_stat: rto %u srtt %u mrtt-us %u rttvar %u", \ + .format_args = "i4i4i4i4", \ }; \ - DECLARE_ETD(_tc, _e, 3); \ + DECLARE_ETD(_tc, _e, 4); \ ed->data[0] = _tc->rto; \ ed->data[1] = _tc->srtt; \ - ed->data[2] = _tc->rttvar; \ + ed->data[2] = (u32) (_tc->mrtt_us * 1e6); \ + ed->data[3] = _tc->rttvar; \ } #define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 9c303eb01a5..0f1ab1ab3b0 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -455,8 +455,11 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack) if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq)) { - tc->mrtt_us = tcp_time_now_us (tc->c_thread_index) - tc->rtt_ts; - mrtt = clib_max ((u32) (tc->mrtt_us * THZ), 1); + f64 sample = tcp_time_now_us (tc->c_thread_index) - tc->rtt_ts; + tc->mrtt_us = tc->mrtt_us + (sample - tc->mrtt_us) * 0.125; + mrtt = clib_max ((u32) (sample * THZ), 1); + /* Allow measuring of a new RTT */ + tc->rtt_ts = 0; } /* As per RFC7323 TSecr can be used for RTTM only if the segment advances * snd_una, i.e., the left side of the send window: @@ -475,9 +478,6 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack) done: - /* Allow measuring of a new RTT */ - tc->rtt_ts = 0; - /* If we got here something must've been ACKed so make sure boff is 0, * even if mrtt is not valid since we update the rto lower */ tc->rto_boff = 0; @@ -486,6 +486,29 @@ done: return 0; } +static void +tcp_estimate_initial_rtt (tcp_connection_t * tc) +{ + u8 thread_index = vlib_num_workers ()? 1 : 0; + int mrtt; + + if (tc->rtt_ts) + { + tc->mrtt_us = tcp_time_now_us (thread_index) - tc->rtt_ts; + mrtt = clib_max ((u32) (tc->mrtt_us * THZ), 1); + tc->rtt_ts = 0; + } + else + { + mrtt = tcp_time_now_w_thread (thread_index) - tc->rcv_opts.tsecr; + tc->mrtt_us = (f64) mrtt *TCP_TICK; + + } + + if (mrtt > 0 && mrtt < TCP_RTT_MAX) + tcp_estimate_rtt (tc, mrtt); +} + /** * Dequeue bytes for connections that have received acks in last burst */ @@ -506,6 +529,9 @@ tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk) tc = tcp_connection_get (pending_deq_acked[i], thread_index); tc->flags &= ~TCP_CONN_DEQ_PENDING; + if (PREDICT_FALSE (!tc->burst_acked)) + continue; + /* Dequeue the newly ACKed bytes */ stream_session_dequeue_drop (&tc->connection, tc->burst_acked); tc->burst_acked = 0; @@ -514,6 +540,11 @@ tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk) /* If everything has been acked, stop retransmit timer * otherwise update. */ tcp_retransmit_timer_update (tc); + + /* If not congested, update pacer based on our new + * cwnd estimate */ + if (!tcp_in_fastrecovery (tc)) + tcp_connection_tx_pacer_update (tc); } _vec_len (wrk->pending_deq_acked) = 0; } @@ -1084,6 +1115,7 @@ tcp_cc_recovery_exit (tcp_connection_t * tc) tcp_update_rto (tc); tc->snd_rxt_ts = 0; tc->snd_nxt = tc->snd_una_max; + tc->rtt_ts = 0; tcp_recovery_off (tc); TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3); } @@ -1096,6 +1128,7 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc) tc->rcv_dupacks = 0; tc->snd_nxt = tc->snd_una_max; tc->snd_rxt_bytes = 0; + tc->rtt_ts = 0; tcp_fastrecovery_off (tc); tcp_fastrecovery_1_smss_off (tc); @@ -1381,6 +1414,10 @@ partial_ack: * Legitimate ACK. 1) See if we can exit recovery */ + /* Update the pacing rate. For the first partial ack we move from + * the artificially constrained rate to the one after congestion */ + tcp_connection_tx_pacer_update (tc); + if (seq_geq (tc->snd_una, tc->snd_congestion)) { tcp_retransmit_timer_update (tc); @@ -1403,10 +1440,6 @@ partial_ack: * Legitimate ACK. 2) If PARTIAL ACK try to retransmit */ - /* Update the pacing rate. For the first partial ack we move from - * the artificially constrained rate to the one after congestion */ - tcp_connection_tx_pacer_update (tc); - /* XXX limit this only to first partial ack? */ tcp_retransmit_timer_force_update (tc); @@ -2427,7 +2460,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } /* Update rtt with the syn-ack sample */ - tcp_update_rtt (new_tc0, vnet_buffer (b0)->tcp.ack_number); + tcp_estimate_initial_rtt (new_tc0); TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, new_tc0); error0 = TCP_ERROR_SYN_ACKS_RCVD; } @@ -2636,7 +2669,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } /* Update rtt and rto */ - tcp_update_rtt (tc0, vnet_buffer (b0)->tcp.ack_number); + tcp_estimate_initial_rtt (tc0); /* Switch state to ESTABLISHED */ tc0->state = TCP_STATE_ESTABLISHED; @@ -2687,6 +2720,12 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * wait for peer's FIN but not indefinitely. */ tcp_connection_timers_reset (tc0); tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + + /* Don't try to deq the FIN acked */ + if (tc0->burst_acked > 1) + stream_session_dequeue_drop (&tc0->connection, + tc0->burst_acked - 1); + tc0->burst_acked = 0; } break; case TCP_STATE_FIN_WAIT_2: @@ -2695,6 +2734,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * acknowledged ("ok") but do not delete the TCB. */ if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0)) goto drop; + tc0->burst_acked = 0; break; case TCP_STATE_CLOSE_WAIT: /* Do the same processing as for the ESTABLISHED state. */ diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 089f85a0ea0..192e820e648 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1000,7 +1000,7 @@ tcp_send_syn (tcp_connection_t * tc) tcp_make_syn (tc, b); /* Measure RTT with this */ - tc->rtt_ts = tcp_time_now (); + tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0); tc->rtt_seq = tc->snd_nxt; tc->rto_boff = 0; |