From 222e1f4160a5828bb2b5bf62716cd76664f6100b Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Sat, 16 Feb 2019 20:47:32 -0800 Subject: tcp: harden for high scale scenarios - Better handle buffer starvation scenarios - Handle case when both peers enter recovery due to packet loss. - Fix passive open establish cleanup Change-Id: I2f28baa2ff0383bb8f5f6d2452b49aa38ce69bce Signed-off-by: Florin Coras --- src/vnet/tcp/tcp_input.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) (limited to 'src/vnet/tcp/tcp_input.c') diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 6514dca4b4a..3d8ae890372 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -350,20 +350,34 @@ tcp_segment_validate (tcp_worker_ctx_t * wrk, tcp_connection_t * tc0, goto error; } - *error0 = TCP_ERROR_RCV_WND; /* If our window is 0 and the packet is in sequence, let it pass * through for ack processing. It should be dropped later. */ - if (!(tc0->rcv_wnd == 0 - && tc0->rcv_nxt == vnet_buffer (b0)->tcp.seq_number)) + if (tc0->rcv_wnd == 0 + && tc0->rcv_nxt == vnet_buffer (b0)->tcp.seq_number) + goto check_reset; + + /* If we entered recovery and peer did so as well, there's a chance that + * dup acks won't be acceptable on either end because seq_end may be less + * than rcv_las. This can happen if acks are lost in both directions. */ + if (tcp_in_recovery (tc0) + && seq_geq (vnet_buffer (b0)->tcp.seq_number, + tc0->rcv_las - tc0->rcv_wnd) + && seq_leq (vnet_buffer (b0)->tcp.seq_end, + tc0->rcv_nxt + tc0->rcv_wnd)) + goto check_reset; + + *error0 = TCP_ERROR_RCV_WND; + + /* If not RST, send dup ack */ + if (!tcp_rst (th0)) { - /* If not RST, send dup ack */ - if (!tcp_rst (th0)) - { - tcp_program_dupack (wrk, tc0); - TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0, vnet_buffer (b0)->tcp); - } - goto error; + tcp_program_dupack (wrk, tc0); + TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0, vnet_buffer (b0)->tcp); } + goto error; + + check_reset: + ; } /* 2nd: check the RST bit */ @@ -507,6 +521,7 @@ tcp_estimate_initial_rtt (tcp_connection_t * tc) if (tc->rtt_ts) { tc->mrtt_us = tcp_time_now_us (thread_index) - tc->rtt_ts; + tc->mrtt_us = clib_max (tc->mrtt_us, 0.0001); mrtt = clib_max ((u32) (tc->mrtt_us * THZ), 1); tc->rtt_ts = 0; } @@ -514,6 +529,9 @@ tcp_estimate_initial_rtt (tcp_connection_t * tc) { mrtt = tcp_time_now_w_thread (thread_index) - tc->rcv_opts.tsecr; mrtt = clib_max (mrtt, 1); + /* Due to retransmits we don't know the initial mrtt */ + if (tc->rto_boff && mrtt > 1 * THZ) + mrtt = 1 * THZ; tc->mrtt_us = (f64) mrtt *TCP_TICK; } @@ -2360,6 +2378,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Make sure the connection actually exists */ ASSERT (tcp_lookup_connection (tc0->c_fib_index, b0, my_thread_index, is_ip4)); + error0 = TCP_ERROR_SPURIOUS_SYN_ACK; goto drop; } @@ -2441,7 +2460,6 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* No SYN flag. Drop. */ if (!tcp_syn (tcp0)) { - clib_warning ("not synack"); error0 = TCP_ERROR_SEGMENT_INVALID; goto drop; } @@ -2449,7 +2467,6 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Parse options */ if (tcp_options_parse (tcp0, &tc0->rcv_opts, 1)) { - clib_warning ("options parse fail"); error0 = TCP_ERROR_OPTIONS; goto drop; } @@ -3676,6 +3693,9 @@ do { \ _(SYN_SENT, TCP_FLAG_RST, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); _(SYN_SENT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_FIN, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, + TCP_ERROR_NONE); /* ACK for for established connection -> tcp-established. */ _(ESTABLISHED, TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); /* FIN for for established connection -> tcp-established. */ -- cgit 1.2.3-korg