summaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp/tcp_output.c
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2018-11-05 15:57:21 -0800
committerDamjan Marion <dmarion@me.com>2018-11-07 13:26:12 +0000
commit7ac053b27fee8f9e437cf7b61357943356381061 (patch)
treef0a844206701bef79107626312e5e31e383423c1 /src/vnet/tcp/tcp_output.c
parentf5942d5612d99c5ea1189cb9f8de6b6097b0456e (diff)
tcp: consume incoming buffers instead of reusing
Instead of reusing buffers for acking, consume all buffers and program output for (dup)ack generation. This implicitly fixes the drop counters that were artificially inflated by both data and feedback traffic. Moreover, the patch also significantly reduces the ack traffic as we now only generate an ack per frame, unless duplicate acks need to be sent. Because of the reduced feedback traffic, a sender's rx path and a receiver's tx path are now significantly less loaded. In particular, a sender can overwhelm a 40Gbps NIC and generate tx drop bursts for low rtts. Consequently, tx pacing is now enforced by default. Change-Id: I619c29a8945bf26c093f8f9e197e3c6d5d43868e Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp/tcp_output.c')
-rw-r--r--src/vnet/tcp/tcp_output.c95
1 files changed, 67 insertions, 28 deletions
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 29a919bd160..089f85a0ea0 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -559,7 +559,6 @@ tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b)
tcp_reuse_buffer (vm, b);
tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
TCP_EVT_DBG (TCP_EVT_ACK_SENT, tc);
- vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
tc->rcv_las = tc->rcv_nxt;
}
@@ -631,7 +630,6 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
tcp_options_write ((u8 *) (th + 1), snd_opts);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
- vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
/* Init retransmit timer. Use update instead of set because of
* retransmissions */
@@ -1011,6 +1009,23 @@ tcp_send_syn (tcp_connection_t * tc)
TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc);
}
+void
+tcp_send_synack (tcp_connection_t * tc)
+{
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
+ vlib_main_t *vm = wrk->vm;
+ vlib_buffer_t *b;
+ u32 bi;
+
+ /* Get buffer */
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi)))
+ return;
+
+ b = vlib_get_buffer (vm, bi);
+ tcp_make_synack (tc, b);
+ tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
+}
+
/**
* Flush tx frame populated by retransmits and timer pops
*/
@@ -1223,6 +1238,56 @@ tcp_send_ack (tcp_connection_t * tc)
tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
}
+void
+tcp_program_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+ if (!(tc->flags & TCP_CONN_SNDACK))
+ {
+ vec_add1 (wrk->pending_acks, tc->c_c_index);
+ tc->flags |= TCP_CONN_SNDACK;
+ }
+}
+
+void
+tcp_program_dupack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+ if (!(tc->flags & TCP_CONN_SNDACK))
+ {
+ vec_add1 (wrk->pending_acks, tc->c_c_index);
+ tc->flags |= TCP_CONN_SNDACK;
+ }
+ if (tc->pending_dupacks < 255)
+ tc->pending_dupacks += 1;
+}
+
+void
+tcp_send_acks (tcp_worker_ctx_t * wrk)
+{
+ u32 thread_index, *pending_acks;
+ tcp_connection_t *tc;
+ int i, j, n_acks;
+
+ if (!vec_len (wrk->pending_acks))
+ return;
+
+ thread_index = wrk->vm->thread_index;
+ pending_acks = wrk->pending_acks;
+ for (i = 0; i < vec_len (pending_acks); i++)
+ {
+ tc = tcp_connection_get (pending_acks[i], thread_index);
+ tc->flags &= ~TCP_CONN_SNDACK;
+ n_acks = clib_max (1, tc->pending_dupacks);
+ /* If we're supposed to send dupacks but have no ooo data
+ * send only one ack */
+ if (tc->pending_dupacks && !vec_len (tc->snd_sacks))
+ n_acks = 1;
+ for (j = 0; j < n_acks; j++)
+ tcp_send_ack (tc);
+ tc->pending_dupacks = 0;
+ }
+ _vec_len (wrk->pending_acks) = 0;
+}
+
/**
* Delayed ack timer handler
*
@@ -1944,13 +2009,6 @@ tcp_fast_retransmit (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
return tcp_fast_retransmit_no_sack (wrk, tc, burst_size);
}
-static u32
-tcp_session_has_ooo_data (tcp_connection_t * tc)
-{
- stream_session_t *s = session_get (tc->c_s_index, tc->c_thread_index);
- return svm_fifo_has_ooo_data (s->server_rx_fifo);
-}
-
static void
tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0,
u16 * next0, u32 * error0)
@@ -2055,25 +2113,6 @@ tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0,
tcp_output_handle_link_local (tc0, b0, next0, error0);
}
- /* Filter out DUPACKs if there are no OOO segments left */
- if (PREDICT_FALSE (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
- {
- /* N.B. Should not filter burst of dupacks. Two issues:
- * 1) dupacks open cwnd on remote peer when congested
- * 2) acks leaving should have the latest rcv_wnd since the
- * burst may have eaten up all of it, so only the old ones
- * could be filtered.
- */
- if (!tcp_session_has_ooo_data (tc0))
- {
- *error0 = TCP_ERROR_FILTERED_DUPACKS;
- *next0 = TCP_OUTPUT_NEXT_DROP;
- return;
- }
- }
-
- /* Stop DELACK timer and fix flags */
- tc0->flags &= ~(TCP_CONN_SNDACK);
if (!TCP_ALWAYS_ACK)
tcp_timer_reset (tc0, TCP_TIMER_DELACK);
}