diff options
author | Florin Coras <fcoras@cisco.com> | 2018-11-05 15:57:21 -0800 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2018-11-07 13:26:12 +0000 |
commit | 7ac053b27fee8f9e437cf7b61357943356381061 (patch) | |
tree | f0a844206701bef79107626312e5e31e383423c1 /src/vnet/tcp/tcp.c | |
parent | f5942d5612d99c5ea1189cb9f8de6b6097b0456e (diff) |
tcp: consume incoming buffers instead of reusing
Instead of reusing buffers for acking, consume all buffers and program
output for (dup)ack generation. This implicitly fixes the drop counters
that were artificially inflated by both data and feedback traffic.
Moreover, the patch also significantly reduces the ack traffic as we now
only generate an ack per frame, unless duplicate acks need to be sent.
Because of the reduced feedback traffic, a sender's rx path and a
receiver's tx path are now significantly less loaded. In particular, a
sender can overwhelm a 40Gbps NIC and generate tx drop bursts for low
rtts. Consequently, tx pacing is now enforced by default.
Change-Id: I619c29a8945bf26c093f8f9e197e3c6d5d43868e
Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp/tcp.c')
-rw-r--r-- | src/vnet/tcp/tcp.c | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index a466e3c935c..ea350dddc69 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -1105,6 +1105,7 @@ tcp_update_time (f64 now, u8 thread_index) tcp_set_time_now (wrk); tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now); tcp_do_fastretransmits (wrk); + tcp_send_acks (wrk); tcp_flush_frames_to_output (wrk); } @@ -1143,13 +1144,16 @@ void tcp_connection_tx_pacer_update (tcp_connection_t * tc) { f64 srtt; + u64 rate; if (!transport_connection_is_tx_paced (&tc->connection)) return; srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); - transport_connection_tx_pacer_update (&tc->connection, - ((f64) tc->cwnd) / srtt); + /* TODO should constrain to interface's max throughput but + * we don't have link speeds for sw ifs ..*/ + rate = tc->cwnd / srtt; + transport_connection_tx_pacer_update (&tc->connection, rate); } void @@ -1326,10 +1330,12 @@ tcp_main_enable (vlib_main_t * vm) vec_validate (tm->wrk_ctx[thread].ongoing_fast_rxt, 255); vec_validate (tm->wrk_ctx[thread].postponed_fast_rxt, 255); vec_validate (tm->wrk_ctx[thread].pending_deq_acked, 255); + vec_validate (tm->wrk_ctx[thread].pending_acks, 255); vec_reset_length (tm->wrk_ctx[thread].pending_fast_rxt); vec_reset_length (tm->wrk_ctx[thread].ongoing_fast_rxt); vec_reset_length (tm->wrk_ctx[thread].postponed_fast_rxt); vec_reset_length (tm->wrk_ctx[thread].pending_deq_acked); + vec_reset_length (tm->wrk_ctx[thread].pending_acks); tm->wrk_ctx[thread].vm = vlib_mains[thread]; /* @@ -1417,6 +1423,7 @@ tcp_init (vlib_main_t * vm) FIB_PROTOCOL_IP6, tcp6_output_node.index); tcp_api_reference (); + tm->tx_pacing = 1; return 0; } @@ -1441,8 +1448,8 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "max-rx-fifo %U", unformat_memory_size, &tm->max_rx_fifo)) ; - else if (unformat (input, "tx-pacing")) - tm->tx_pacing = 1; + else if (unformat (input, "no-tx-pacing")) + tm->tx_pacing = 0; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); |