aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2017-05-09 18:54:52 -0700
committerDave Barach <openvpp@barachs.net>2017-05-10 14:02:51 +0000
commitdb84e579ef77476e3c73780e20243ee1799530f3 (patch)
treef475a8d9466729c3cb8902e9b4ad96b201e91567
parent1015a1ef1355b1160dcf2a3c0cbe0cfe340be653 (diff)
Improve TCP option handling, VPP-757
Change-Id: Ica634536387d1196366ec96c52770287fcab0768 Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r--src/vnet/tcp/tcp.c6
-rw-r--r--src/vnet/tcp/tcp_input.c47
-rw-r--r--src/vnet/tcp/tcp_output.c16
3 files changed, 54 insertions, 15 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 224ee0dd160..a65ab7ffac4 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -154,6 +154,10 @@ tcp_connection_reset (tcp_connection_t * tc)
return;
tc->state = TCP_STATE_CLOSED;
+
+ /* Make sure all timers are cleared */
+ tcp_connection_timers_reset (tc);
+
stream_session_reset_notify (&tc->connection);
}
@@ -585,7 +589,7 @@ tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space)
{
if (tc->snd_wnd < tc->snd_mss)
{
- return tc->snd_wnd < snd_space ? tc->snd_wnd : 0;
+ return tc->snd_wnd <= snd_space ? tc->snd_wnd : 0;
}
/* If we can't write at least a segment, don't try at all */
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 82e676d4654..318d8ec5c89 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -112,7 +112,14 @@ tcp_segment_in_rcv_wnd (tcp_connection_t * tc, u32 seq, u32 end_seq)
&& seq_leq (seq, tc->rcv_nxt + tc->rcv_wnd));
}
-void
+/**
+ * Parse TCP header options.
+ *
+ * @param th TCP header
+ * @param to TCP options data structure to be populated
+ * @return -1 if parsing failed
+ */
+int
tcp_options_parse (tcp_header_t * th, tcp_options_t * to)
{
const u8 *data;
@@ -134,17 +141,20 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to)
if (kind == TCP_OPTION_EOL)
break;
else if (kind == TCP_OPTION_NOOP)
- opt_len = 1;
+ {
+ opt_len = 1;
+ continue;
+ }
else
{
/* broken options */
if (opts_len < 2)
- break;
+ return -1;
opt_len = data[1];
/* weird option length */
if (opt_len < 2 || opt_len > opts_len)
- break;
+ return -1;
}
/* Parse options */
@@ -206,6 +216,7 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to)
continue;
}
}
+ return 0;
}
/**
@@ -261,7 +272,10 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0,
if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0)))
return -1;
- tcp_options_parse (th0, &tc0->opt);
+ if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->opt)))
+ {
+ return -1;
+ }
if (tcp_segment_check_paws (tc0))
{
@@ -1109,19 +1123,24 @@ static int
tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b,
u16 n_data_bytes, u32 * next0)
{
- u32 error = 0;
+ u32 error = 0, n_bytes_to_drop;
/* Handle out-of-order data */
if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt))
{
/* Old sequence numbers allowed through because they overlapped
* the rx window */
-
if (seq_lt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt))
{
error = TCP_ERROR_SEGMENT_OLD;
*next0 = TCP_NEXT_DROP;
- goto done;
+
+ /* Chop off the bytes in the past */
+ n_bytes_to_drop = tc->rcv_nxt - vnet_buffer (b)->tcp.seq_number;
+ n_data_bytes -= n_bytes_to_drop;
+ vlib_buffer_advance (b, n_bytes_to_drop);
+
+ goto in_order;
}
error = tcp_session_enqueue_ooo (tc, b, n_data_bytes);
@@ -1145,6 +1164,8 @@ tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b,
goto done;
}
+in_order:
+
/* In order data, enqueue. Fifo figures out by itself if any out-of-order
* segments can be enqueued after fifo tail offset changes. */
error = tcp_session_enqueue_data (tc, b, n_data_bytes);
@@ -1540,7 +1561,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
new_tc0->irs = seq0;
/* Parse options */
- tcp_options_parse (tcp0, &new_tc0->opt);
+ if (tcp_options_parse (tcp0, &new_tc0->opt))
+ goto drop;
if (tcp_opts_tstamp (&new_tc0->opt))
{
@@ -1943,6 +1965,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
case TCP_STATE_FIN_WAIT_2:
/* Got FIN, send ACK! */
tc0->state = TCP_STATE_TIME_WAIT;
+ tcp_connection_timers_reset (tc0);
tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
tcp_make_ack (tc0, b0);
next0 = tcp_next_output (is_ip4);
@@ -2149,7 +2172,10 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto drop;
}
- tcp_options_parse (th0, &child0->opt);
+ if (tcp_options_parse (th0, &child0->opt))
+ {
+ goto drop;
+ }
child0->irs = vnet_buffer (b0)->tcp.seq_number;
child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1;
@@ -2553,6 +2579,7 @@ do { \
_(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(LAST_ACK, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
_(CLOSED, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED);
#undef _
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 39891fc3fde..a462d8dac3d 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -396,20 +396,24 @@ tcp_update_snd_mss (tcp_connection_t * tc)
/* XXX check if MTU has been updated */
tc->snd_mss = clib_min (tc->mss, tc->opt.mss) - tc->snd_opts_len;
+ ASSERT (tc->snd_mss > 0);
}
void
tcp_init_mss (tcp_connection_t * tc)
{
+ u16 default_min_mss = 536;
tcp_update_rcv_mss (tc);
/* TODO cache mss and consider PMTU discovery */
tc->snd_mss = clib_min (tc->opt.mss, tc->mss);
- if (tc->snd_mss == 0)
+ if (tc->snd_mss < 45)
{
clib_warning ("snd mss is 0");
- tc->snd_mss = tc->mss;
+ /* Assume that at least the min default mss works */
+ tc->snd_mss = default_min_mss;
+ tc->opt.mss = default_min_mss;
}
/* We should have enough space for 40 bytes of options */
@@ -1171,13 +1175,17 @@ tcp_timer_persist_handler (u32 index)
vlib_buffer_t *b;
u32 bi, n_bytes;
- tc = tcp_connection_get (index, thread_index);
+ tc = tcp_connection_get_if_valid (index, thread_index);
+
+ if (!tc)
+ return;
/* Make sure timer handle is set to invalid */
tc->timers[TCP_TIMER_PERSIST] = TCP_TIMER_HANDLE_INVALID;
/* Problem already solved or worse */
- if (tc->snd_wnd > tc->snd_mss || tcp_in_recovery (tc))
+ if (tc->state == TCP_STATE_CLOSED
+ || tc->snd_wnd > tc->snd_mss || tcp_in_recovery (tc))
return;
/* Increment RTO backoff */