summaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp/tcp_input.c
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2019-02-16 20:47:32 -0800
committerDamjan Marion <dmarion@me.com>2019-02-18 20:43:33 +0000
commit222e1f4160a5828bb2b5bf62716cd76664f6100b (patch)
treeeeb8b47fa94dc23152f1652e9dbd14c384d910ff /src/vnet/tcp/tcp_input.c
parenta333795d1c425877645754a384af47755a80712e (diff)
tcp: harden for high scale scenarios
- Better handle buffer starvation scenarios - Handle case when both peers enter recovery due to packet loss. - Fix passive open establish cleanup Change-Id: I2f28baa2ff0383bb8f5f6d2452b49aa38ce69bce Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp/tcp_input.c')
-rw-r--r--src/vnet/tcp/tcp_input.c44
1 files changed, 32 insertions, 12 deletions
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 6514dca4b4a..3d8ae890372 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -350,20 +350,34 @@ tcp_segment_validate (tcp_worker_ctx_t * wrk, tcp_connection_t * tc0,
goto error;
}
- *error0 = TCP_ERROR_RCV_WND;
/* If our window is 0 and the packet is in sequence, let it pass
* through for ack processing. It should be dropped later. */
- if (!(tc0->rcv_wnd == 0
- && tc0->rcv_nxt == vnet_buffer (b0)->tcp.seq_number))
+ if (tc0->rcv_wnd == 0
+ && tc0->rcv_nxt == vnet_buffer (b0)->tcp.seq_number)
+ goto check_reset;
+
+ /* If we entered recovery and peer did so as well, there's a chance that
+ * dup acks won't be acceptable on either end because seq_end may be less
+ * than rcv_las. This can happen if acks are lost in both directions. */
+ if (tcp_in_recovery (tc0)
+ && seq_geq (vnet_buffer (b0)->tcp.seq_number,
+ tc0->rcv_las - tc0->rcv_wnd)
+ && seq_leq (vnet_buffer (b0)->tcp.seq_end,
+ tc0->rcv_nxt + tc0->rcv_wnd))
+ goto check_reset;
+
+ *error0 = TCP_ERROR_RCV_WND;
+
+ /* If not RST, send dup ack */
+ if (!tcp_rst (th0))
{
- /* If not RST, send dup ack */
- if (!tcp_rst (th0))
- {
- tcp_program_dupack (wrk, tc0);
- TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0, vnet_buffer (b0)->tcp);
- }
- goto error;
+ tcp_program_dupack (wrk, tc0);
+ TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0, vnet_buffer (b0)->tcp);
}
+ goto error;
+
+ check_reset:
+ ;
}
/* 2nd: check the RST bit */
@@ -507,6 +521,7 @@ tcp_estimate_initial_rtt (tcp_connection_t * tc)
if (tc->rtt_ts)
{
tc->mrtt_us = tcp_time_now_us (thread_index) - tc->rtt_ts;
+ tc->mrtt_us = clib_max (tc->mrtt_us, 0.0001);
mrtt = clib_max ((u32) (tc->mrtt_us * THZ), 1);
tc->rtt_ts = 0;
}
@@ -514,6 +529,9 @@ tcp_estimate_initial_rtt (tcp_connection_t * tc)
{
mrtt = tcp_time_now_w_thread (thread_index) - tc->rcv_opts.tsecr;
mrtt = clib_max (mrtt, 1);
+ /* Due to retransmits we don't know the initial mrtt */
+ if (tc->rto_boff && mrtt > 1 * THZ)
+ mrtt = 1 * THZ;
tc->mrtt_us = (f64) mrtt *TCP_TICK;
}
@@ -2360,6 +2378,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* Make sure the connection actually exists */
ASSERT (tcp_lookup_connection (tc0->c_fib_index, b0,
my_thread_index, is_ip4));
+ error0 = TCP_ERROR_SPURIOUS_SYN_ACK;
goto drop;
}
@@ -2441,7 +2460,6 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* No SYN flag. Drop. */
if (!tcp_syn (tcp0))
{
- clib_warning ("not synack");
error0 = TCP_ERROR_SEGMENT_INVALID;
goto drop;
}
@@ -2449,7 +2467,6 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* Parse options */
if (tcp_options_parse (tcp0, &tc0->rcv_opts, 1))
{
- clib_warning ("options parse fail");
error0 = TCP_ERROR_OPTIONS;
goto drop;
}
@@ -3676,6 +3693,9 @@ do { \
_(SYN_SENT, TCP_FLAG_RST, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE);
_(SYN_SENT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
TCP_ERROR_NONE);
+ _(SYN_SENT, TCP_FLAG_FIN, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE);
+ _(SYN_SENT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
+ TCP_ERROR_NONE);
/* ACK for for established connection -> tcp-established. */
_(ESTABLISHED, TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
/* FIN for for established connection -> tcp-established. */