diff options
author | Florin Coras <fcoras@cisco.com> | 2018-11-05 15:57:21 -0800 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2018-11-07 13:26:12 +0000 |
commit | 7ac053b27fee8f9e437cf7b61357943356381061 (patch) | |
tree | f0a844206701bef79107626312e5e31e383423c1 /src/vnet/tcp/tcp_output.c | |
parent | f5942d5612d99c5ea1189cb9f8de6b6097b0456e (diff) |
tcp: consume incoming buffers instead of reusing
Instead of reusing buffers for acking, consume all buffers and program
output for (dup)ack generation. This implicitly fixes the drop counters
that were artificially inflated by both data and feedback traffic.
Moreover, the patch also significantly reduces the ack traffic as we now
only generate an ack per frame, unless duplicate acks need to be sent.
Because of the reduced feedback traffic, a sender's rx path and a
receiver's tx path are now significantly less loaded. In particular, a
sender can overwhelm a 40Gbps NIC and generate tx drop bursts for low
rtts. Consequently, tx pacing is now enforced by default.
Change-Id: I619c29a8945bf26c093f8f9e197e3c6d5d43868e
Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp/tcp_output.c')
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 95 |
1 files changed, 67 insertions, 28 deletions
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 29a919bd160..089f85a0ea0 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -559,7 +559,6 @@ tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b) tcp_reuse_buffer (vm, b); tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK); TCP_EVT_DBG (TCP_EVT_ACK_SENT, tc); - vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; tc->rcv_las = tc->rcv_nxt; } @@ -631,7 +630,6 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) tcp_options_write ((u8 *) (th + 1), snd_opts); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; - vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; /* Init retransmit timer. Use update instead of set because of * retransmissions */ @@ -1011,6 +1009,23 @@ tcp_send_syn (tcp_connection_t * tc) TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc); } +void +tcp_send_synack (tcp_connection_t * tc) +{ + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); + vlib_main_t *vm = wrk->vm; + vlib_buffer_t *b; + u32 bi; + + /* Get buffer */ + if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + tcp_make_synack (tc, b); + tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); +} + /** * Flush tx frame populated by retransmits and timer pops */ @@ -1223,6 +1238,56 @@ tcp_send_ack (tcp_connection_t * tc) tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); } +void +tcp_program_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) +{ + if (!(tc->flags & TCP_CONN_SNDACK)) + { + vec_add1 (wrk->pending_acks, tc->c_c_index); + tc->flags |= TCP_CONN_SNDACK; + } +} + +void +tcp_program_dupack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) +{ + if (!(tc->flags & TCP_CONN_SNDACK)) + { + vec_add1 (wrk->pending_acks, tc->c_c_index); + tc->flags |= TCP_CONN_SNDACK; + } + if (tc->pending_dupacks < 255) + tc->pending_dupacks += 1; +} + +void +tcp_send_acks (tcp_worker_ctx_t * wrk) +{ + u32 thread_index, *pending_acks; + tcp_connection_t *tc; + int i, j, n_acks; + + if (!vec_len (wrk->pending_acks)) + return; + + thread_index = wrk->vm->thread_index; + pending_acks = wrk->pending_acks; + for (i = 0; i < vec_len (pending_acks); i++) + { + tc = tcp_connection_get (pending_acks[i], thread_index); + tc->flags &= ~TCP_CONN_SNDACK; + n_acks = clib_max (1, tc->pending_dupacks); + /* If we're supposed to send dupacks but have no ooo data + * send only one ack */ + if (tc->pending_dupacks && !vec_len (tc->snd_sacks)) + n_acks = 1; + for (j = 0; j < n_acks; j++) + tcp_send_ack (tc); + tc->pending_dupacks = 0; + } + _vec_len (wrk->pending_acks) = 0; +} + /** * Delayed ack timer handler * @@ -1944,13 +2009,6 @@ tcp_fast_retransmit (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, return tcp_fast_retransmit_no_sack (wrk, tc, burst_size); } -static u32 -tcp_session_has_ooo_data (tcp_connection_t * tc) -{ - stream_session_t *s = session_get (tc->c_s_index, tc->c_thread_index); - return svm_fifo_has_ooo_data (s->server_rx_fifo); -} - static void tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0, u16 * next0, u32 * error0) @@ -2055,25 +2113,6 @@ tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0, tcp_output_handle_link_local (tc0, b0, next0, error0); } - /* Filter out DUPACKs if there are no OOO segments left */ - if (PREDICT_FALSE (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK)) - { - /* N.B. Should not filter burst of dupacks. Two issues: - * 1) dupacks open cwnd on remote peer when congested - * 2) acks leaving should have the latest rcv_wnd since the - * burst may have eaten up all of it, so only the old ones - * could be filtered. - */ - if (!tcp_session_has_ooo_data (tc0)) - { - *error0 = TCP_ERROR_FILTERED_DUPACKS; - *next0 = TCP_OUTPUT_NEXT_DROP; - return; - } - } - - /* Stop DELACK timer and fix flags */ - tc0->flags &= ~(TCP_CONN_SNDACK); if (!TCP_ALWAYS_ACK) tcp_timer_reset (tc0, TCP_TIMER_DELACK); } |