aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMichal Kalderon <mkalderon@marvell.com>2021-08-08 04:30:39 -0700
committerFlorin Coras <florin.coras@gmail.com>2021-10-25 15:46:23 +0000
commit3effadc66ff8acb2e5c83645faec411de59332c2 (patch)
tree8fe4e24bb21f471072a48f795f0efd8e0bc2e853 /src
parent3265ec8cb12d94e91a38ac377e78970eb032d88c (diff)
tcp: fix: TCP timewait port reuse rfc compliance
This patch provides a fix for early-kill of timewait sockets that is based on rfc's 1122, 6191. The following commits provided a solution for port re-use. However, they are not fully compliant with rfc 1122 4.2.2.13 ( Closing a connection ) and rfc 6191 (Reducing the TIME-WAIT State Using TCP Timestamps) commit b092b77cf238ba ("tcp: Enable TCP timewait port use") introduced a significant improvement by enabling TCP timewait port re-use. commit ee1cb469b2dd ("tcp: fix port reuse with multiple listeners") fixed usage of the wrong value for connection_index when searching for a listener, by storing the state in tcp.flags. Implementation details: When a SYN is received during time-wait state, the code checks whether all the requirements for accepting the SYN packet are met. If they aren't, the SYN can't be accepted and the packet is dropped, otherwise, connection is deleted and a new connection with same port is opened. Type: fix Signed-off-by: Ofer Heifetz <oferh@marvell.com> Signed-off-by: Yuval Caduri <cyuval@marvell.com> Signed-off-by: Michal Kalderon <mkalderon@marvell.com> Change-Id: I38a33c6e321c760d45ebec9154399e1c90dd0250
Diffstat (limited to 'src')
-rw-r--r--src/vnet/tcp/tcp.c8
-rw-r--r--src/vnet/tcp/tcp_input.c80
2 files changed, 84 insertions, 4 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 62267bbdd1f..4825bf62856 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -702,7 +702,13 @@ tcp_init_snd_vars (tcp_connection_t * tc)
tcp_update_time_now (tcp_get_worker (vlib_get_thread_index ()));
tcp_init_rcv_mss (tc);
- tc->iss = tcp_generate_random_iss (tc);
+ /*
+ * In special case of early-kill of timewait socket, the iss will already
+ * be initialized to ensure it is greater than the last incarnation of the
+ * connection. see syn_during_timewait() for more details.
+ */
+ if (!tc->iss)
+ tc->iss = tcp_generate_random_iss (tc);
tc->snd_una = tc->iss;
tc->snd_nxt = tc->iss + 1;
tc->srtt = 0.1 * THZ; /* 100 ms */
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 6546a1422c1..df31c9e775f 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -2587,6 +2587,61 @@ tcp46_listen_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
}
/**
+ * SYN received in TIME-WAIT state.
+ *
+ * RFC 1122:
+ * "When a connection is [...] on TIME-WAIT state [...]
+ * [a TCP] MAY accept a new SYN from the remote TCP to
+ * reopen the connection directly, if it:
+ *
+ * (1) assigns its initial sequence number for the new
+ * connection to be larger than the largest sequence
+ * number it used on the previous connection incarnation,
+ * and
+ *
+ * (2) returns to TIME-WAIT state if the SYN turns out
+ * to be an old duplicate".
+ *
+ * The function returns true if the syn can be accepted during
+ * connection time-wait (port reuse). In this case the function
+ * also calculates what the iss should be for the new connection.
+ */
+always_inline int
+syn_during_timewait (tcp_connection_t *tc, vlib_buffer_t *b, u32 *iss)
+{
+ int paws_reject = tcp_segment_check_paws (tc);
+ u32 tw_iss;
+
+ *iss = 0;
+ /* Check that the SYN arrived out of window. We accept it */
+ if (!paws_reject &&
+ (seq_geq (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt) ||
+ (tcp_opts_tstamp (&tc->rcv_opts) &&
+ timestamp_lt (tc->tsval_recent, tc->rcv_opts.tsval))))
+ {
+ /* Set the iss of the new connection to be the largest sequence number
+ * the old peer would have accepted and add some random number
+ */
+ tw_iss = tc->snd_nxt + tcp_available_snd_wnd (tc) +
+ (uword) (tcp_time_now_us (tc->c_thread_index) * 1e6) % 65535;
+ if (tw_iss == 0)
+ tw_iss++;
+ *iss = tw_iss;
+
+ return 1;
+ }
+ else
+ {
+ TCP_DBG (
+ "ERROR not accepting SYN in timewait,paws_reject=%d, seq_num =%ld, "
+ "rcv_nxt=%ld, tstamp_present=%d, tsval_recent = %d, tsval = %d\n",
+ paws_reject, vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt,
+ tcp_opts_tstamp (&tc->rcv_opts), tc->tsval_recent, tc->rcv_opts.tsval);
+ return 0;
+ }
+}
+
+/**
* LISTEN state processing as per RFC 793 p. 65
*/
always_inline uword
@@ -2596,6 +2651,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 n_left_from, *from, n_syns = 0;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u32 thread_index = vm->thread_index;
+ u32 tw_iss = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -2616,7 +2672,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
lc = tcp_listener_get (vnet_buffer (b[0])->tcp.connection_index);
}
- else
+ else /* We are in TimeWait state*/
{
tcp_connection_t *tc;
tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
@@ -2626,6 +2682,14 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
error = TCP_ERROR_CREATE_EXISTS;
goto done;
}
+
+ if (PREDICT_FALSE (!syn_during_timewait (tc, b[0], &tw_iss)))
+ {
+ /* This SYN can't be accepted */
+ error = TCP_ERROR_CREATE_EXISTS;
+ goto done;
+ }
+
lc = tcp_lookup_listener (b[0], tc->c_fib_index, is_ip4);
/* clean up the old session */
tcp_connection_del (tc);
@@ -2669,6 +2733,12 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
child->state = TCP_STATE_SYN_RCVD;
child->c_fib_index = lc->c_fib_index;
child->cc_algo = lc->cc_algo;
+
+ /* In the regular case, the tw_iss will be zero, but
+ * in the special case of syn arriving in time_wait state, the value
+ * will be set according to rfc 1122
+ */
+ child->iss = tw_iss;
tcp_connection_init_vars (child);
child->rto = TCP_RTO_MIN;
@@ -2843,8 +2913,12 @@ tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc,
error = tm->dispatch_table[tc->state][flags].error;
tc->segs_in += 1;
- /* Track connection state when packet was received. It helps
- * @ref tcp46_listen_inline detect port reuse */
+ /* Track connection state when packet was received. It is required
+ * for @ref tcp46_listen_inline to detect whether we reached
+ * the node as a result of a SYN packet received while in time-wait
+ * state. In this case the connection_index in vnet buffer will point
+ * to the existing tcp connection and not the listener
+ */
vnet_buffer (b)->tcp.flags = tc->state;
if (PREDICT_FALSE (error != TCP_ERROR_NONE))