summaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2017-11-27 04:34:14 -0500
committerDave Barach <openvpp@barachs.net>2017-11-28 13:21:26 +0000
commitf988e696149f42828444c69762c036d9684b6bb0 (patch)
tree8e62fbd53a713e64be7c0441f0399aa023a949d8 /src/vnet/tcp
parentaf6f93a4eb4740ac13b1d65eb44da442976fa809 (diff)
tcp: fix retransmissions under buffer shortage
- add debugging scaffolding for simulating buffer shortage Change-Id: Ice519d74f9c4e4094c4586c548185135b7bb5f2d Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp')
-rw-r--r--src/vnet/tcp/tcp.c7
-rw-r--r--src/vnet/tcp/tcp.h12
-rwxr-xr-xsrc/vnet/tcp/tcp_debug.h34
-rw-r--r--src/vnet/tcp/tcp_input.c5
-rw-r--r--src/vnet/tcp/tcp_output.c56
5 files changed, 82 insertions, 32 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index a0a5f190fa4..b16b2a7dfb2 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -233,6 +233,8 @@ tcp_connection_reset (tcp_connection_t * tc)
tcp_connection_cleanup (tc);
break;
case TCP_STATE_ESTABLISHED:
+ stream_session_reset_notify (&tc->connection);
+ /* fall through */
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_FIN_WAIT_1:
case TCP_STATE_FIN_WAIT_2:
@@ -242,7 +244,6 @@ tcp_connection_reset (tcp_connection_t * tc)
/* Make sure all timers are cleared */
tcp_connection_timers_reset (tc);
- stream_session_reset_notify (&tc->connection);
/* Wait for cleanup from session layer but not forever */
tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
@@ -1319,7 +1320,9 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
&tm->local_endpoints_table_buckets))
;
-
+ else if (unformat (input, "buffer-fail-fraction %f",
+ &tm->buffer_fail_fraction))
+ ;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index b057b883fd8..1ddfac0ce96 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -410,6 +410,9 @@ typedef struct _tcp_main
u8 punt_unknown4;
u8 punt_unknown6;
+
+ /** fault-injection */
+ f64 buffer_fail_fraction;
} tcp_main_t;
extern tcp_main_t tcp_main;
@@ -432,6 +435,15 @@ tcp_buffer_hdr (vlib_buffer_t * b)
+ vnet_buffer (b)->tcp.hdr_offset);
}
+#if (VLIB_BUFFER_TRACE_TRAJECTORY)
+#define tcp_trajectory_add_start(b, start) \
+{ \
+ (*vlib_buffer_trace_trajectory_cb) (b, start); \
+}
+#else
+#define tcp_trajectory_add_start(b, start)
+#endif
+
clib_error_t *vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en);
void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index eb318cde4b9..5d4f7d6879d 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -22,6 +22,7 @@
#define TCP_DEBUG_SM (0)
#define TCP_DEBUG_CC (0)
#define TCP_DEBUG_CC_STAT (1)
+#define TCP_DEBUG_BUFFER_ALLOCATION (0)
#define foreach_tcp_dbg_evt \
_(INIT, "") \
@@ -747,6 +748,39 @@ if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \
} \
}
+/*
+ * Buffer allocation
+ */
+#if TCP_DEBUG_BUFFER_ALLOCATION
+
+#define TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL(thread_index) \
+{ \
+ static u32 *buffer_fail_counters; \
+ if (PREDICT_FALSE (buffer_fail_counters == 0)) \
+ { \
+ u32 num_threads; \
+ vlib_thread_main_t *vtm = vlib_get_thread_main (); \
+ num_threads = 1 /* main thread */ + vtm->n_threads; \
+ vec_validate (buffer_fail_counters, num_threads - 1); \
+ } \
+ if (PREDICT_FALSE (tcp_main.buffer_fail_fraction != 0.0)) \
+ { \
+ if (PREDICT_TRUE (buffer_fail_counters[thread_index] > 0)) \
+ { \
+ if ((1.0 / (f32) (buffer_fail_counters[thread_index])) \
+ < tcp_main.buffer_fail_fraction) \
+ { \
+ buffer_fail_counters[thread_index] = 0.0000001; \
+ return -1; \
+ } \
+ } \
+ buffer_fail_counters[thread_index] ++; \
+ } \
+}
+#else
+#define TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL(thread_index)
+#endif
+
#else
#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
#endif
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 614b94a4b06..702a94f3a98 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -3001,6 +3001,9 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto done;
}
+ vnet_buffer (b0)->tcp.hdr_offset = (u8 *) tcp0
+ - (u8 *) vlib_buffer_get_current (b0);
+
/* Session exists */
if (PREDICT_TRUE (0 != tconn))
{
@@ -3014,8 +3017,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer (b0)->tcp.ack_number =
clib_net_to_host_u32 (tcp0->ack_number);
- vnet_buffer (b0)->tcp.hdr_offset = (u8 *) tcp0
- - (u8 *) vlib_buffer_get_current (b0);
vnet_buffer (b0)->tcp.data_offset = n_advance_bytes0;
vnet_buffer (b0)->tcp.data_len = n_data_bytes0;
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 3509ad4701d..f377c912073 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -463,6 +463,9 @@ tcp_get_free_buffer_index (tcp_main_t * tm, u32 * bidx)
{
u32 *my_tx_buffers;
u32 thread_index = vlib_get_thread_index ();
+
+ TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL (thread_index);
+
if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0))
{
if (tcp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE))
@@ -504,7 +507,7 @@ tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
b->total_length_not_including_first_buffer = 0;
vnet_buffer (b)->tcp.flags = 0;
-
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
/* Leave enough space for headers */
return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
}
@@ -590,9 +593,6 @@ tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b)
initial_wnd);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
tcp_options_write ((u8 *) (th + 1), &snd_opts);
-
- tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
- tc->rto * TCP_TO_TIMER_TICK);
}
/**
@@ -641,15 +641,11 @@ tcp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
b->error = 0;
/* Default FIB for now */
- vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
/* Send to IP lookup */
next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
- if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
- {
- b->pre_data[0] = 2;
- b->pre_data[1] = next_index;
- }
+ tcp_trajectory_add_start (b, 1);
f = tm->ip_lookup_tx_frames[!is_ip4][thread_index];
if (!f)
@@ -697,11 +693,7 @@ tcp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
/* Decide where to send the packet */
next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
- if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
- {
- b->pre_data[0] = 1;
- b->pre_data[1] = next_index;
- }
+ tcp_trajectory_add_start (b, 2);
/* Get frame to v4/6 output node */
f = tm->tx_frames[!is_ip4][thread_index];
@@ -791,6 +783,7 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0,
}
tcp_reuse_buffer (vm, b0);
+ tcp_trajectory_add_start (b0, 4);
th0 = vlib_buffer_push_tcp_net_order (b0, dst_port, src_port, seq, ack,
sizeof (tcp_header_t), flags, 0);
@@ -977,6 +970,14 @@ tcp_send_syn (tcp_connection_t * tc)
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_main_t *vm = vlib_get_main ();
+ /*
+ * Setup retransmit and establish timers before requesting buffer
+ * such that we can return if we've ran out.
+ */
+ tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
+ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
+ tc->rto * TCP_TO_TIMER_TICK);
+
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
@@ -989,9 +990,6 @@ tcp_send_syn (tcp_connection_t * tc)
tc->rtt_seq = tc->snd_nxt;
tc->rto_boff = 0;
- /* Set the connection establishment timer */
- tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
-
tcp_push_ip_hdr (tm, tc, b);
tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc);
@@ -1055,6 +1053,7 @@ tcp_send_fin (tcp_connection_t * tc)
u32 bi;
u8 fin_snt = 0;
+ tcp_retransmit_timer_force_update (tc);
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
b = vlib_get_buffer (vm, bi);
@@ -1075,7 +1074,6 @@ tcp_send_fin (tcp_connection_t * tc)
{
tc->snd_nxt = tc->snd_una_max;
}
- tcp_retransmit_timer_force_update (tc);
TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
}
@@ -1378,9 +1376,11 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
if (tc->state >= TCP_STATE_ESTABLISHED)
{
/* Lost FIN, retransmit and return */
- if (tcp_is_lost_fin (tc))
+ if (tc->state == TCP_STATE_FIN_WAIT_1)
{
tcp_send_fin (tc);
+ tc->rto_boff += 1;
+ tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
return;
}
@@ -1419,12 +1419,6 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
if (n_bytes == 0)
{
- ASSERT (!b);
- if (tc->snd_una == tc->snd_una_max)
- return;
- ASSERT (tc->rto_boff > 1 && tc->snd_una == tc->snd_congestion);
- clib_warning ("retransmit fail: %U", format_tcp_connection, tc, 2);
- /* Try again eventually */
tcp_retransmit_timer_set (tc);
return;
}
@@ -1460,6 +1454,9 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
+ tc->rto * TCP_TO_TIMER_TICK);
+
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
@@ -1483,7 +1480,10 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
tc->rtt_ts = 0;
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
- return;
+ {
+ tcp_retransmit_timer_force_update (tc);
+ return;
+ }
b = vlib_get_buffer (vm, bi);
tcp_make_synack (tc, b);
@@ -2037,7 +2037,7 @@ tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
/* Prepare to send to IP lookup */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
next0 = TCP_RESET_NEXT_IP_LOOKUP;
done: