summaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/tcp')
-rw-r--r--src/vnet/tcp/tcp.c8
-rw-r--r--src/vnet/tcp/tcp_input.c80
2 files changed, 84 insertions, 4 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 62267bbdd1f..4825bf62856 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -702,7 +702,13 @@ tcp_init_snd_vars (tcp_connection_t * tc)
tcp_update_time_now (tcp_get_worker (vlib_get_thread_index ()));
tcp_init_rcv_mss (tc);
- tc->iss = tcp_generate_random_iss (tc);
+ /*
+ * In special case of early-kill of timewait socket, the iss will already
+ * be initialized to ensure it is greater than the last incarnation of the
+ * connection. see syn_during_timewait() for more details.
+ */
+ if (!tc->iss)
+ tc->iss = tcp_generate_random_iss (tc);
tc->snd_una = tc->iss;
tc->snd_nxt = tc->iss + 1;
tc->srtt = 0.1 * THZ; /* 100 ms */
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 6546a1422c1..df31c9e775f 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -2587,6 +2587,61 @@ tcp46_listen_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
}
/**
+ * SYN received in TIME-WAIT state.
+ *
+ * RFC 1122:
+ * "When a connection is [...] on TIME-WAIT state [...]
+ * [a TCP] MAY accept a new SYN from the remote TCP to
+ * reopen the connection directly, if it:
+ *
+ * (1) assigns its initial sequence number for the new
+ * connection to be larger than the largest sequence
+ * number it used on the previous connection incarnation,
+ * and
+ *
+ * (2) returns to TIME-WAIT state if the SYN turns out
+ * to be an old duplicate".
+ *
+ * The function returns true if the syn can be accepted during
+ * connection time-wait (port reuse). In this case the function
+ * also calculates what the iss should be for the new connection.
+ */
+always_inline int
+syn_during_timewait (tcp_connection_t *tc, vlib_buffer_t *b, u32 *iss)
+{
+ int paws_reject = tcp_segment_check_paws (tc);
+ u32 tw_iss;
+
+ *iss = 0;
+ /* Check that the SYN arrived out of window. We accept it */
+ if (!paws_reject &&
+ (seq_geq (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt) ||
+ (tcp_opts_tstamp (&tc->rcv_opts) &&
+ timestamp_lt (tc->tsval_recent, tc->rcv_opts.tsval))))
+ {
+ /* Set the iss of the new connection to be the largest sequence number
+ * the old peer would have accepted and add some random number
+ */
+ tw_iss = tc->snd_nxt + tcp_available_snd_wnd (tc) +
+ (uword) (tcp_time_now_us (tc->c_thread_index) * 1e6) % 65535;
+ if (tw_iss == 0)
+ tw_iss++;
+ *iss = tw_iss;
+
+ return 1;
+ }
+ else
+ {
+ TCP_DBG (
+ "ERROR not accepting SYN in timewait,paws_reject=%d, seq_num =%ld, "
+ "rcv_nxt=%ld, tstamp_present=%d, tsval_recent = %d, tsval = %d\n",
+ paws_reject, vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt,
+ tcp_opts_tstamp (&tc->rcv_opts), tc->tsval_recent, tc->rcv_opts.tsval);
+ return 0;
+ }
+}
+
+/**
* LISTEN state processing as per RFC 793 p. 65
*/
always_inline uword
@@ -2596,6 +2651,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 n_left_from, *from, n_syns = 0;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u32 thread_index = vm->thread_index;
+ u32 tw_iss = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -2616,7 +2672,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
lc = tcp_listener_get (vnet_buffer (b[0])->tcp.connection_index);
}
- else
+ else /* We are in TimeWait state*/
{
tcp_connection_t *tc;
tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
@@ -2626,6 +2682,14 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
error = TCP_ERROR_CREATE_EXISTS;
goto done;
}
+
+ if (PREDICT_FALSE (!syn_during_timewait (tc, b[0], &tw_iss)))
+ {
+ /* This SYN can't be accepted */
+ error = TCP_ERROR_CREATE_EXISTS;
+ goto done;
+ }
+
lc = tcp_lookup_listener (b[0], tc->c_fib_index, is_ip4);
/* clean up the old session */
tcp_connection_del (tc);
@@ -2669,6 +2733,12 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
child->state = TCP_STATE_SYN_RCVD;
child->c_fib_index = lc->c_fib_index;
child->cc_algo = lc->cc_algo;
+
+ /* In the regular case, the tw_iss will be zero, but
+ * in the special case of syn arriving in time_wait state, the value
+ * will be set according to rfc 1122
+ */
+ child->iss = tw_iss;
tcp_connection_init_vars (child);
child->rto = TCP_RTO_MIN;
@@ -2843,8 +2913,12 @@ tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc,
error = tm->dispatch_table[tc->state][flags].error;
tc->segs_in += 1;
- /* Track connection state when packet was received. It helps
- * @ref tcp46_listen_inline detect port reuse */
+ /* Track connection state when packet was received. It is required
+ * for @ref tcp46_listen_inline to detect whether we reached
+ * the node as a result of a SYN packet received while in time-wait
+ * state. In this case the connection_index in vnet buffer will point
+ * to the existing tcp connection and not the listener
+ */
vnet_buffer (b)->tcp.flags = tc->state;
if (PREDICT_FALSE (error != TCP_ERROR_NONE))