aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2020-02-14 23:41:25 +0000
committerDave Barach <openvpp@barachs.net>2020-02-18 15:53:06 +0000
commitaa3886993c13d71d93ef01dc73b79985d6ec997f (patch)
treea112192692c4bdb66b73579d18582800b597aee0 /src/vnet/tcp
parent4339c36157c0579c60963cea4bafd3ce2521d207 (diff)
tcp: pace timer handling
Type: improvement Signed-off-by: Florin Coras <fcoras@cisco.com> Change-Id: I93067054631d6ae2411a7b08d7b681aed7a121b2
Diffstat (limited to 'src/vnet/tcp')
-rw-r--r--src/vnet/tcp/tcp.c248
-rw-r--r--src/vnet/tcp/tcp.h19
-rw-r--r--src/vnet/tcp/tcp_output.c59
3 files changed, 168 insertions, 158 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index e34f773d7d3..f24ddb3a879 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -1288,103 +1288,23 @@ tcp_session_tx_fifo_offset (transport_connection_t * trans_conn)
}
static void
-tcp_update_time (f64 now, u8 thread_index)
-{
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
-
- tcp_set_time_now (wrk);
- tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now);
- tcp_flush_frames_to_output (wrk);
-}
-
-static void
-tcp_session_flush_data (transport_connection_t * tconn)
-{
- tcp_connection_t *tc = (tcp_connection_t *) tconn;
- if (tc->flags & TCP_CONN_PSH_PENDING)
- return;
- tc->flags |= TCP_CONN_PSH_PENDING;
- tc->psh_seq = tc->snd_una + transport_max_tx_dequeue (tconn) - 1;
-}
-
-/* *INDENT-OFF* */
-const static transport_proto_vft_t tcp_proto = {
- .enable = vnet_tcp_enable_disable,
- .start_listen = tcp_session_bind,
- .stop_listen = tcp_session_unbind,
- .push_header = tcp_session_push_header,
- .get_connection = tcp_session_get_transport,
- .get_listener = tcp_session_get_listener,
- .get_half_open = tcp_half_open_session_get_transport,
- .connect = tcp_session_open,
- .close = tcp_session_close,
- .cleanup = tcp_session_cleanup,
- .reset = tcp_session_reset,
- .send_mss = tcp_session_send_mss,
- .send_space = tcp_session_send_space,
- .update_time = tcp_update_time,
- .tx_fifo_offset = tcp_session_tx_fifo_offset,
- .flush_data = tcp_session_flush_data,
- .custom_tx = tcp_session_custom_tx,
- .format_connection = format_tcp_session,
- .format_listener = format_tcp_listener_session,
- .format_half_open = format_tcp_half_open_session,
- .transport_options = {
- .tx_type = TRANSPORT_TX_PEEK,
- .service_type = TRANSPORT_SERVICE_VC,
- },
-};
-/* *INDENT-ON* */
-
-void
-tcp_connection_tx_pacer_update (tcp_connection_t * tc)
-{
- if (!transport_connection_is_tx_paced (&tc->connection))
- return;
-
- f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
-
- transport_connection_tx_pacer_update (&tc->connection,
- tcp_cc_get_pacing_rate (tc),
- srtt * CLIB_US_TIME_FREQ);
-}
-
-void
-tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window,
- u32 start_bucket)
-{
- f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
- transport_connection_tx_pacer_reset (&tc->connection,
- tcp_cc_get_pacing_rate (tc),
- start_bucket,
- srtt * CLIB_US_TIME_FREQ);
-}
-
-static void
-tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index)
+tcp_timer_waitclose_handler (tcp_connection_t * tc)
{
- tcp_connection_t *tc;
-
- tc = tcp_connection_get (conn_index, thread_index);
- if (!tc)
- return;
-
switch (tc->state)
{
case TCP_STATE_CLOSE_WAIT:
tcp_connection_timers_reset (tc);
session_transport_closed_notify (&tc->connection);
-
+ /* App never returned with a close */
if (!(tc->flags & TCP_CONN_FINPNDG))
{
- clib_warning ("close-wait with fin sent");
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ tcp_worker_stats_inc (tc->c_thread_index, to_closewait, 1);
break;
}
- /* Session didn't come back with a close. Send FIN either way
- * and switch to LAST_ACK. */
+ /* Send FIN either way and switch to LAST_ACK. */
tcp_cong_recovery_off (tc);
/* Make sure we don't try to send unsent data */
tc->snd_nxt = tc->snd_una;
@@ -1393,7 +1313,7 @@ tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index)
/* Make sure we don't wait in LAST ACK forever */
tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
- tcp_worker_stats_inc (thread_index, to_closewait, 1);
+ tcp_worker_stats_inc (tc->c_thread_index, to_closewait2, 1);
/* Don't delete the connection yet */
break;
@@ -1415,21 +1335,21 @@ tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index)
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
}
- tcp_worker_stats_inc (thread_index, to_finwait1, 1);
+ tcp_worker_stats_inc (tc->c_thread_index, to_finwait1, 1);
break;
case TCP_STATE_LAST_ACK:
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
session_transport_closed_notify (&tc->connection);
- tcp_worker_stats_inc (thread_index, to_lastack, 1);
+ tcp_worker_stats_inc (tc->c_thread_index, to_lastack, 1);
break;
case TCP_STATE_CLOSING:
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
session_transport_closed_notify (&tc->connection);
- tcp_worker_stats_inc (thread_index, to_closing, 1);
+ tcp_worker_stats_inc (tc->c_thread_index, to_closing, 1);
break;
case TCP_STATE_FIN_WAIT_2:
tcp_send_reset (tc);
@@ -1437,7 +1357,7 @@ tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index)
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
session_transport_closed_notify (&tc->connection);
tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
- tcp_worker_stats_inc (thread_index, to_finwait2, 1);
+ tcp_worker_stats_inc (tc->c_thread_index, to_finwait2, 1);
break;
default:
tcp_connection_del (tc);
@@ -1457,15 +1377,144 @@ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
/* *INDENT-ON* */
static void
+tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk)
+{
+ u32 n_timers, connection_index, timer_id, thread_index, timer_handle;
+ tcp_connection_t *tc;
+ int i;
+
+ if (!(n_timers = clib_fifo_elts (wrk->pending_timers)))
+ return;
+
+ thread_index = wrk->vm->thread_index;
+ for (i = 0; i < clib_min (n_timers, 32); i++)
+ {
+ clib_fifo_sub1 (wrk->pending_timers, timer_handle);
+ connection_index = timer_handle & 0x0FFFFFFF;
+ timer_id = timer_handle >> 28;
+
+ if (PREDICT_TRUE (timer_id != TCP_TIMER_RETRANSMIT_SYN))
+ tc = tcp_connection_get (connection_index, thread_index);
+ else
+ tc = tcp_half_open_connection_get (connection_index);
+
+ if (PREDICT_FALSE (!tc))
+ continue;
+
+ /* Skip timer if it was rearmed while pending dispatch */
+ if (PREDICT_FALSE (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID))
+ continue;
+
+ (*timer_expiration_handlers[timer_id]) (tc);
+ }
+}
+
+/**
+ * Flush ip lookup tx frames populated by timer pops
+ */
+static void
+tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk)
+{
+ if (wrk->ip_lookup_tx_frames[0])
+ {
+ vlib_put_frame_to_node (wrk->vm, ip4_lookup_node.index,
+ wrk->ip_lookup_tx_frames[0]);
+ wrk->ip_lookup_tx_frames[0] = 0;
+ }
+ if (wrk->ip_lookup_tx_frames[1])
+ {
+ vlib_put_frame_to_node (wrk->vm, ip6_lookup_node.index,
+ wrk->ip_lookup_tx_frames[1]);
+ wrk->ip_lookup_tx_frames[1] = 0;
+ }
+}
+
+static void
+tcp_update_time (f64 now, u8 thread_index)
+{
+ tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+
+ tcp_set_time_now (wrk);
+ tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now);
+ tcp_dispatch_pending_timers (wrk);
+ tcp_flush_frames_to_output (wrk);
+}
+
+static void
+tcp_session_flush_data (transport_connection_t * tconn)
+{
+ tcp_connection_t *tc = (tcp_connection_t *) tconn;
+ if (tc->flags & TCP_CONN_PSH_PENDING)
+ return;
+ tc->flags |= TCP_CONN_PSH_PENDING;
+ tc->psh_seq = tc->snd_una + transport_max_tx_dequeue (tconn) - 1;
+}
+
+/* *INDENT-OFF* */
+const static transport_proto_vft_t tcp_proto = {
+ .enable = vnet_tcp_enable_disable,
+ .start_listen = tcp_session_bind,
+ .stop_listen = tcp_session_unbind,
+ .push_header = tcp_session_push_header,
+ .get_connection = tcp_session_get_transport,
+ .get_listener = tcp_session_get_listener,
+ .get_half_open = tcp_half_open_session_get_transport,
+ .connect = tcp_session_open,
+ .close = tcp_session_close,
+ .cleanup = tcp_session_cleanup,
+ .reset = tcp_session_reset,
+ .send_mss = tcp_session_send_mss,
+ .send_space = tcp_session_send_space,
+ .update_time = tcp_update_time,
+ .tx_fifo_offset = tcp_session_tx_fifo_offset,
+ .flush_data = tcp_session_flush_data,
+ .custom_tx = tcp_session_custom_tx,
+ .format_connection = format_tcp_session,
+ .format_listener = format_tcp_listener_session,
+ .format_half_open = format_tcp_half_open_session,
+ .transport_options = {
+ .tx_type = TRANSPORT_TX_PEEK,
+ .service_type = TRANSPORT_SERVICE_VC,
+ },
+};
+/* *INDENT-ON* */
+
+void
+tcp_connection_tx_pacer_update (tcp_connection_t * tc)
+{
+ if (!transport_connection_is_tx_paced (&tc->connection))
+ return;
+
+ f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
+
+ transport_connection_tx_pacer_update (&tc->connection,
+ tcp_cc_get_pacing_rate (tc),
+ srtt * CLIB_US_TIME_FREQ);
+}
+
+void
+tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window,
+ u32 start_bucket)
+{
+ f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
+ transport_connection_tx_pacer_reset (&tc->connection,
+ tcp_cc_get_pacing_rate (tc),
+ start_bucket,
+ srtt * CLIB_US_TIME_FREQ);
+}
+
+static void
tcp_expired_timers_dispatch (u32 * expired_timers)
{
u32 thread_index = vlib_get_thread_index ();
u32 connection_index, timer_id, n_expired;
+ tcp_worker_ctx_t *wrk;
tcp_connection_t *tc;
int i;
+ wrk = tcp_get_worker (thread_index);
n_expired = vec_len (expired_timers);
- tcp_worker_stats_inc (thread_index, timer_expirations, n_expired);
+ tcp_workerp_stats_inc (wrk, timer_expirations, n_expired);
/*
* Invalidate all timer handles before dispatching. This avoids dangling
@@ -1486,15 +1535,7 @@ tcp_expired_timers_dispatch (u32 * expired_timers)
tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID;
}
- /*
- * Dispatch expired timers
- */
- for (i = 0; i < n_expired; i++)
- {
- connection_index = expired_timers[i] & 0x0FFFFFFF;
- timer_id = expired_timers[i] >> 28;
- (*timer_expiration_handlers[timer_id]) (connection_index, thread_index);
- }
+ clib_fifo_add (wrk->pending_timers, expired_timers, n_expired);
}
static void
@@ -2297,14 +2338,19 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++)
{
wrk = tcp_get_worker (thread);
- vlib_cli_output (vm, "Thread %d:\n", thread);
+ vlib_cli_output (vm, "Thread %u:\n", thread);
+
+ if (clib_fifo_elts (wrk->pending_timers))
+ vlib_cli_output (vm, " %lu pending timers",
+ clib_fifo_elts (wrk->pending_timers));
#define _(name,type,str) \
if (wrk->stats.name) \
- vlib_cli_output (vm, " %ld %s", wrk->stats.name, str);
+ vlib_cli_output (vm, " %lu %s", wrk->stats.name, str);
foreach_tcp_wrk_stat
#undef _
}
+
return 0;
}
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index a60e105f0d2..e7b53b8ec42 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -82,13 +82,6 @@ typedef enum _tcp_timers
TCP_N_TIMERS
} tcp_timers_e;
-typedef void (timer_expiration_handler) (u32 index, u32 thread_index);
-
-extern timer_expiration_handler tcp_timer_delack_handler;
-extern timer_expiration_handler tcp_timer_retransmit_handler;
-extern timer_expiration_handler tcp_timer_persist_handler;
-extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
-
#define TCP_TIMER_HANDLE_INVALID ((u32) ~0)
#define TCP_TIMER_TICK 0.1 /**< Timer tick in seconds */
@@ -478,6 +471,13 @@ struct _tcp_cc_algorithm
#define tcp_csum_offload(tc) (!((tc)->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
+typedef void (timer_expiration_handler) (tcp_connection_t * tc);
+
+extern timer_expiration_handler tcp_timer_delack_handler;
+extern timer_expiration_handler tcp_timer_retransmit_handler;
+extern timer_expiration_handler tcp_timer_persist_handler;
+extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
+
always_inline void
tcp_cong_recovery_off (tcp_connection_t * tc)
{
@@ -507,6 +507,7 @@ typedef struct _tcp_lookup_dispatch
_(rxt_segs, u64, "segments retransmitted") \
_(tr_events, u32, "timer retransmit events") \
_(to_closewait, u32, "timeout close-wait") \
+ _(to_closewait2, u32, "timeout close-wait w/data") \
_(to_finwait1, u32, "timeout fin-wait-1") \
_(to_finwait2, u32, "timeout fin-wait-2") \
_(to_lastack, u32, "timeout last-ack") \
@@ -554,6 +555,9 @@ typedef struct tcp_worker_ctx_
/** tx buffer free list */
u32 *tx_buffers;
+ /* Fifo of pending timer expirations */
+ u32 *pending_timers;
+
/** worker timer wheel */
tw_timer_wheel_16t_2w_512sl_t timer_wheel;
@@ -831,7 +835,6 @@ void tcp_send_fin (tcp_connection_t * tc);
void tcp_send_ack (tcp_connection_t * tc);
void tcp_update_burst_snd_vars (tcp_connection_t * tc);
void tcp_update_rto (tcp_connection_t * tc);
-void tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk);
void tcp_send_window_update_ack (tcp_connection_t * tc);
void tcp_program_ack (tcp_connection_t * tc);
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 69b34cc595f..5eccda60ad2 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -973,32 +973,6 @@ tcp_send_synack (tcp_connection_t * tc)
}
/**
- * Flush ip lookup tx frames populated by timer pops
- */
-static void
-tcp_flush_frame_to_ip_lookup (tcp_worker_ctx_t * wrk, u8 is_ip4)
-{
- if (wrk->ip_lookup_tx_frames[!is_ip4])
- {
- u32 next_index;
- next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
- vlib_put_frame_to_node (wrk->vm, next_index,
- wrk->ip_lookup_tx_frames[!is_ip4]);
- wrk->ip_lookup_tx_frames[!is_ip4] = 0;
- }
-}
-
-/**
- * Flush v4 and v6 tcp and ip-lookup tx frames for thread index
- */
-void
-tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk)
-{
- tcp_flush_frame_to_ip_lookup (wrk, 1);
- tcp_flush_frame_to_ip_lookup (wrk, 0);
-}
-
-/**
* Send FIN
*/
void
@@ -1208,11 +1182,8 @@ tcp_program_retransmit (tcp_connection_t * tc)
* Sends delayed ACK when timer expires
*/
void
-tcp_timer_delack_handler (u32 index, u32 thread_index)
+tcp_timer_delack_handler (tcp_connection_t * tc)
{
- tcp_connection_t *tc;
-
- tc = tcp_connection_get (index, thread_index);
tcp_send_ack (tc);
}
@@ -1443,19 +1414,17 @@ tcp_cc_init_rxt_timeout (tcp_connection_t * tc)
}
void
-tcp_timer_retransmit_handler (u32 tc_index, u32 thread_index)
+tcp_timer_retransmit_handler (tcp_connection_t * tc)
{
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
vlib_main_t *vm = wrk->vm;
- tcp_connection_t *tc;
vlib_buffer_t *b = 0;
u32 bi, n_bytes;
tcp_workerp_stats_inc (wrk, tr_events, 1);
- tc = tcp_connection_get (tc_index, thread_index);
- /* Note: the connection may have been closed and pool_put */
- if (PREDICT_FALSE (tc == 0 || tc->state == TCP_STATE_SYN_SENT))
+ /* Should be handled by a different handler */
+ if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT))
return;
/* Wait-close and retransmit could pop at the same time */
@@ -1592,18 +1561,15 @@ tcp_timer_retransmit_handler (u32 tc_index, u32 thread_index)
* SYN retransmit timer handler. Active open only.
*/
void
-tcp_timer_retransmit_syn_handler (u32 tc_index, u32 thread_index)
+tcp_timer_retransmit_syn_handler (tcp_connection_t * tc)
{
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
vlib_main_t *vm = wrk->vm;
- tcp_connection_t *tc;
vlib_buffer_t *b = 0;
u32 bi;
- tc = tcp_half_open_connection_get (tc_index);
-
/* Note: the connection may have transitioned to ESTABLISHED... */
- if (PREDICT_FALSE (tc == 0 || tc->state != TCP_STATE_SYN_SENT))
+ if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT))
return;
/* Half-open connection actually moved to established but we were
@@ -1658,21 +1624,16 @@ tcp_timer_retransmit_syn_handler (u32 tc_index, u32 thread_index)
*
*/
void
-tcp_timer_persist_handler (u32 index, u32 thread_index)
+tcp_timer_persist_handler (tcp_connection_t * tc)
{
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
u32 bi, max_snd_bytes, available_bytes, offset;
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_main_t *vm = wrk->vm;
- tcp_connection_t *tc;
vlib_buffer_t *b;
int n_bytes = 0;
u8 *data;
- tc = tcp_connection_get_if_valid (index, thread_index);
- if (!tc)
- return;
-
/* Problem already solved or worse */
if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
|| (tc->flags & TCP_CONN_FINSNT))