From db84e579ef77476e3c73780e20243ee1799530f3 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Tue, 9 May 2017 18:54:52 -0700 Subject: Improve TCP option handling, VPP-757 Change-Id: Ica634536387d1196366ec96c52770287fcab0768 Signed-off-by: Florin Coras --- src/vnet/tcp/tcp.c | 6 +++++- src/vnet/tcp/tcp_input.c | 47 +++++++++++++++++++++++++++++++++++++---------- src/vnet/tcp/tcp_output.c | 16 ++++++++++++---- 3 files changed, 54 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 224ee0dd160..a65ab7ffac4 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -154,6 +154,10 @@ tcp_connection_reset (tcp_connection_t * tc) return; tc->state = TCP_STATE_CLOSED; + + /* Make sure all timers are cleared */ + tcp_connection_timers_reset (tc); + stream_session_reset_notify (&tc->connection); } @@ -585,7 +589,7 @@ tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space) { if (tc->snd_wnd < tc->snd_mss) { - return tc->snd_wnd < snd_space ? tc->snd_wnd : 0; + return tc->snd_wnd <= snd_space ? tc->snd_wnd : 0; } /* If we can't write at least a segment, don't try at all */ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 82e676d4654..318d8ec5c89 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -112,7 +112,14 @@ tcp_segment_in_rcv_wnd (tcp_connection_t * tc, u32 seq, u32 end_seq) && seq_leq (seq, tc->rcv_nxt + tc->rcv_wnd)); } -void +/** + * Parse TCP header options. + * + * @param th TCP header + * @param to TCP options data structure to be populated + * @return -1 if parsing failed + */ +int tcp_options_parse (tcp_header_t * th, tcp_options_t * to) { const u8 *data; @@ -134,17 +141,20 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to) if (kind == TCP_OPTION_EOL) break; else if (kind == TCP_OPTION_NOOP) - opt_len = 1; + { + opt_len = 1; + continue; + } else { /* broken options */ if (opts_len < 2) - break; + return -1; opt_len = data[1]; /* weird option length */ if (opt_len < 2 || opt_len > opts_len) - break; + return -1; } /* Parse options */ @@ -206,6 +216,7 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to) continue; } } + return 0; } /** @@ -261,7 +272,10 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0))) return -1; - tcp_options_parse (th0, &tc0->opt); + if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->opt))) + { + return -1; + } if (tcp_segment_check_paws (tc0)) { @@ -1109,19 +1123,24 @@ static int tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b, u16 n_data_bytes, u32 * next0) { - u32 error = 0; + u32 error = 0, n_bytes_to_drop; /* Handle out-of-order data */ if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt)) { /* Old sequence numbers allowed through because they overlapped * the rx window */ - if (seq_lt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt)) { error = TCP_ERROR_SEGMENT_OLD; *next0 = TCP_NEXT_DROP; - goto done; + + /* Chop off the bytes in the past */ + n_bytes_to_drop = tc->rcv_nxt - vnet_buffer (b)->tcp.seq_number; + n_data_bytes -= n_bytes_to_drop; + vlib_buffer_advance (b, n_bytes_to_drop); + + goto in_order; } error = tcp_session_enqueue_ooo (tc, b, n_data_bytes); @@ -1145,6 +1164,8 @@ tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b, goto done; } +in_order: + /* In order data, enqueue. Fifo figures out by itself if any out-of-order * segments can be enqueued after fifo tail offset changes. */ error = tcp_session_enqueue_data (tc, b, n_data_bytes); @@ -1540,7 +1561,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, new_tc0->irs = seq0; /* Parse options */ - tcp_options_parse (tcp0, &new_tc0->opt); + if (tcp_options_parse (tcp0, &new_tc0->opt)) + goto drop; if (tcp_opts_tstamp (&new_tc0->opt)) { @@ -1943,6 +1965,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, case TCP_STATE_FIN_WAIT_2: /* Got FIN, send ACK! */ tc0->state = TCP_STATE_TIME_WAIT; + tcp_connection_timers_reset (tc0); tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); tcp_make_ack (tc0, b0); next0 = tcp_next_output (is_ip4); @@ -2149,7 +2172,10 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto drop; } - tcp_options_parse (th0, &child0->opt); + if (tcp_options_parse (th0, &child0->opt)) + { + goto drop; + } child0->irs = vnet_buffer (b0)->tcp.seq_number; child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1; @@ -2553,6 +2579,7 @@ do { \ _(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(LAST_ACK, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED); _(CLOSED, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED); #undef _ diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 39891fc3fde..a462d8dac3d 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -396,20 +396,24 @@ tcp_update_snd_mss (tcp_connection_t * tc) /* XXX check if MTU has been updated */ tc->snd_mss = clib_min (tc->mss, tc->opt.mss) - tc->snd_opts_len; + ASSERT (tc->snd_mss > 0); } void tcp_init_mss (tcp_connection_t * tc) { + u16 default_min_mss = 536; tcp_update_rcv_mss (tc); /* TODO cache mss and consider PMTU discovery */ tc->snd_mss = clib_min (tc->opt.mss, tc->mss); - if (tc->snd_mss == 0) + if (tc->snd_mss < 45) { clib_warning ("snd mss is 0"); - tc->snd_mss = tc->mss; + /* Assume that at least the min default mss works */ + tc->snd_mss = default_min_mss; + tc->opt.mss = default_min_mss; } /* We should have enough space for 40 bytes of options */ @@ -1171,13 +1175,17 @@ tcp_timer_persist_handler (u32 index) vlib_buffer_t *b; u32 bi, n_bytes; - tc = tcp_connection_get (index, thread_index); + tc = tcp_connection_get_if_valid (index, thread_index); + + if (!tc) + return; /* Make sure timer handle is set to invalid */ tc->timers[TCP_TIMER_PERSIST] = TCP_TIMER_HANDLE_INVALID; /* Problem already solved or worse */ - if (tc->snd_wnd > tc->snd_mss || tcp_in_recovery (tc)) + if (tc->state == TCP_STATE_CLOSED + || tc->snd_wnd > tc->snd_mss || tcp_in_recovery (tc)) return; /* Increment RTO backoff */ -- cgit 1.2.3-korg