summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2017-07-18 05:38:03 -0400
committerFlorin Coras <fcoras@cisco.com>2017-07-21 19:20:09 -0400
commit6534b7aa13bc5bed15ed87f47bb766405963e9e8 (patch)
tree999524eff2a5c811ef61e65354e6018c8ae3de33
parent161c59c75c667ce7a3c1d6173723831dc30e994c (diff)
Improvements to tcp rx path and debugging
- Increment rcv_nxt for fin packets - Call tcp_segment_rcv only if buffer has data - Parse rcv opts before deleting half-open connection - Fix initial rcv_wnd - Improved event logging Change-Id: I9b83c04f432c4cec832c480b03e534deff02c3b1 Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r--src/vnet/session/node.c73
-rw-r--r--src/vnet/session/session.c38
-rw-r--r--src/vnet/session/session.h4
-rwxr-xr-xsrc/vnet/session/session_api.c7
-rwxr-xr-xsrc/vnet/session/session_cli.c22
-rw-r--r--src/vnet/tcp/builtin_client.c11
-rw-r--r--src/vnet/tcp/builtin_server.c8
-rw-r--r--src/vnet/tcp/tcp.c59
-rw-r--r--src/vnet/tcp/tcp.h12
-rwxr-xr-xsrc/vnet/tcp/tcp_debug.h246
-rw-r--r--src/vnet/tcp/tcp_input.c165
-rw-r--r--src/vnet/tcp/tcp_output.c51
-rw-r--r--src/vnet/tcp/tcp_test.c99
13 files changed, 612 insertions, 183 deletions
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index 983b78b86f7..8d703b0b302 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -443,6 +443,79 @@ dump_thread_0_event_queue (void)
}
}
+static u8
+session_node_cmp_event (session_fifo_event_t * e, svm_fifo_t * f)
+{
+ stream_session_t *s;
+ switch (e->event_type)
+ {
+ case FIFO_EVENT_APP_RX:
+ case FIFO_EVENT_APP_TX:
+ case FIFO_EVENT_BUILTIN_RX:
+ if (e->fifo == f)
+ return 1;
+ break;
+ case FIFO_EVENT_DISCONNECT:
+ break;
+ case FIFO_EVENT_RPC:
+ s = stream_session_get_from_handle (e->session_handle);
+ if (!s)
+ {
+ clib_warning ("session has event but doesn't exist!");
+ break;
+ }
+ if (s->server_rx_fifo == f || s->server_tx_fifo == f)
+ return 1;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+u8
+session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e)
+{
+ session_manager_main_t *smm = vnet_get_session_manager_main ();
+ unix_shared_memory_queue_t *q;
+ session_fifo_event_t *pending_event_vector, *evt;
+ int i, index, found = 0;
+ i8 *headp;
+ u8 thread_index;
+
+ ASSERT (e);
+ thread_index = f->master_thread_index;
+ /*
+ * Search evt queue
+ */
+ q = smm->vpp_event_queues[thread_index];
+ index = q->head;
+ for (i = 0; i < q->cursize; i++)
+ {
+ headp = (i8 *) (&q->data[0] + q->elsize * index);
+ clib_memcpy (e, headp, q->elsize);
+ found = session_node_cmp_event (e, f);
+ if (found)
+ break;
+ if (++index == q->maxsize)
+ index = 0;
+ }
+ /*
+ * Search pending events vector
+ */
+ pending_event_vector = smm->pending_event_vector[thread_index];
+ vec_foreach (evt, pending_event_vector)
+ {
+ found = session_node_cmp_event (evt, f);
+ if (found)
+ {
+ clib_memcpy (e, evt, sizeof (*evt));
+ break;
+ }
+ }
+ return found;
+}
+
static uword
session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 2c2a27c1af9..09bc00e745a 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -32,6 +32,22 @@ static transport_proto_vft_t *tp_vfts;
session_manager_main_t session_manager_main;
+transport_connection_t *
+stream_session_lookup_half_open (transport_connection_t * tc)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ session_kv4_t kv4;
+ int rv;
+ if (tc->is_ip4)
+ {
+ make_v4_ss_kv_from_tc (&kv4, tc);
+ rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4);
+ if (rv == 0)
+ return tp_vfts[tc->proto].get_half_open (kv4.value & 0xFFFFFFFFULL);
+ }
+ return 0;
+}
+
/*
* Session lookup key; (src-ip, dst-ip, src-port, dst-port, session-type)
* Value: (owner thread index << 32 | session_index);
@@ -501,7 +517,7 @@ stream_session_create_i (segment_manager_t * sm, transport_connection_t * tc,
tc->s_index = s->session_index;
/* Add to the main lookup table */
- value = (((u64) thread_index) << 32) | (u64) s->session_index;
+ value = stream_session_handle (s);
stream_session_table_add_for_tc (tc, value);
*ret_s = s;
@@ -817,8 +833,18 @@ stream_session_connect_notify (transport_connection_t * tc, u8 sst,
}
/* Notify client */
- app->cb_fns.session_connected_callback (app->index, api_context, new_s,
- is_fail);
+ if (app->cb_fns.session_connected_callback (app->index, api_context, new_s,
+ is_fail))
+ {
+ clib_warning ("failed to notify app");
+ if (!is_fail)
+ stream_session_disconnect (new_s);
+ }
+ else
+ {
+ if (!is_fail)
+ new_s->session_state = SESSION_STATE_READY;
+ }
/* Cleanup session lookup */
stream_session_half_open_table_del (smm, sst, tc);
@@ -862,15 +888,19 @@ void
stream_session_delete (stream_session_t * s)
{
session_manager_main_t *smm = vnet_get_session_manager_main ();
+ int rv;
/* Delete from the main lookup table. */
- stream_session_table_del (smm, s);
+ if ((rv = stream_session_table_del (smm, s)))
+ clib_warning ("hash delete error, rv %d", rv);
/* Cleanup fifo segments */
segment_manager_dealloc_fifos (s->svm_segment_index, s->server_rx_fifo,
s->server_tx_fifo);
pool_put (smm->sessions[s->thread_index], s);
+ if (CLIB_DEBUG)
+ memset (s, 0xFA, sizeof (*s));
}
/**
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index 6069c5740f7..6c6163260f8 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -170,6 +170,8 @@ typedef int
extern session_fifo_rx_fn session_tx_fifo_peek_and_snd;
extern session_fifo_rx_fn session_tx_fifo_dequeue_and_snd;
+u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e);
+
struct _session_manager_main
{
/** Lookup tables for established sessions and listeners */
@@ -289,6 +291,8 @@ transport_connection_t *stream_session_lookup_transport6 (ip6_address_t * lcl,
stream_session_t *stream_session_lookup_listener (ip46_address_t * lcl,
u16 lcl_port, u8 proto);
+transport_connection_t
+ * stream_session_lookup_half_open (transport_connection_t * tc);
void stream_session_table_add_for_tc (transport_connection_t * tc, u64 value);
int stream_session_table_del_for_tc (transport_connection_t * tc);
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 60f764af48a..6bee3e27bdb 100755
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -184,13 +184,6 @@ send_session_connected_callback (u32 app_index, u32 api_context,
}
vl_msg_api_send_shmem (q, (u8 *) & mp);
-
- /* Remove client if connect failed */
- if (!is_fail)
- {
- s->session_state = SESSION_STATE_READY;
- }
-
return 0;
}
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index e8e6f99c742..4d432977356 100755
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -19,8 +19,24 @@ u8 *
format_stream_session_fifos (u8 * s, va_list * args)
{
stream_session_t *ss = va_arg (*args, stream_session_t *);
+ int verbose = va_arg (*args, int);
+ session_fifo_event_t _e, *e = &_e;
+ u8 found;
+
s = format (s, " Rx fifo: %U", format_svm_fifo, ss->server_rx_fifo, 1);
+ if (verbose > 2 && ss->server_rx_fifo->has_event)
+ {
+ found = session_node_lookup_fifo_event (ss->server_rx_fifo, e);
+ s = format (s, " session node event: %s\n",
+ found ? "found" : "not found");
+ }
s = format (s, " Tx fifo: %U", format_svm_fifo, ss->server_tx_fifo, 1);
+ if (verbose > 2 && ss->server_tx_fifo->has_event)
+ {
+ found = session_node_lookup_fifo_event (ss->server_tx_fifo, e);
+ s = format (s, " session node event: %s\n",
+ found ? "found" : "not found");
+ }
return s;
}
@@ -55,7 +71,7 @@ format_stream_session (u8 * s, va_list * args)
if (verbose == 1)
s = format (s, "%v", str);
if (verbose > 1)
- s = format (s, "%U", format_stream_session_fifos, ss);
+ s = format (s, "%U", format_stream_session_fifos, ss, verbose);
}
else if (ss->session_state == SESSION_STATE_LISTENING)
{
@@ -75,7 +91,7 @@ format_stream_session (u8 * s, va_list * args)
if (verbose == 1)
s = format (s, "%v", str);
if (verbose > 1)
- s = format (s, "%U", format_stream_session_fifos, ss);
+ s = format (s, "%U", format_stream_session_fifos, ss, verbose);
}
else
{
@@ -248,7 +264,7 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (one_session)
{
- vlib_cli_output (vm, "%U", format_stream_session, s, 2);
+ vlib_cli_output (vm, "%U", format_stream_session, s, 3);
return 0;
}
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index a92bacaa37d..744f50e7db2 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -410,9 +410,6 @@ builtin_session_connected_callback (u32 app_index, u32 api_context,
return -1;
}
- /* Mark vpp session as connected */
- s->session_state = SESSION_STATE_READY;
-
tm->our_event_queue = session_manager_get_vpp_event_queue (s->thread_index);
tm->vpp_event_queue = session_manager_get_vpp_event_queue (s->thread_index);
@@ -466,6 +463,7 @@ builtin_session_reset_callback (stream_session_t * s)
{
if (s->session_state == SESSION_STATE_READY)
clib_warning ("Reset active connection %U", format_stream_session, s, 2);
+ stream_session_cleanup (s);
return;
}
@@ -478,6 +476,11 @@ builtin_session_create_callback (stream_session_t * s)
static void
builtin_session_disconnect_callback (stream_session_t * s)
{
+ tclient_main_t *tm = &tclient_main;
+ vnet_disconnect_args_t _a, *a = &_a;
+ a->handle = stream_session_handle (s);
+ a->app_index = tm->app_index;
+ vnet_disconnect_session (a);
return;
}
@@ -521,7 +524,7 @@ attach_builtin_test_clients_app (void)
options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
options[SESSION_OPTIONS_SEGMENT_SIZE] = (2ULL << 32);
options[SESSION_OPTIONS_RX_FIFO_SIZE] = tm->fifo_size;
- options[SESSION_OPTIONS_TX_FIFO_SIZE] = tm->fifo_size / 2;
+ options[SESSION_OPTIONS_TX_FIFO_SIZE] = tm->fifo_size;
options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = tm->private_segment_count;
options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = tm->private_segment_size;
options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = prealloc_fifos;
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index 4ecaf56a70c..3416678ead6 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -213,15 +213,15 @@ builtin_server_rx_callback (stream_session_t * s)
q = bsm->vpp_queue[thread_index];
if (PREDICT_FALSE (q->cursize == q->maxsize))
clib_warning ("out of event queue space");
- else if (unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* don't wait for mutex */
- ))
+ else if (unix_shared_memory_queue_add (q, (u8 *) & evt, 0))
clib_warning ("failed to enqueue self-tap");
- bsm->rx_retries[thread_index][s->session_index]++;
if (bsm->rx_retries[thread_index][s->session_index] == 500000)
{
clib_warning ("session stuck: %U", format_stream_session, s, 2);
}
+ if (bsm->rx_retries[thread_index][s->session_index] < 500001)
+ bsm->rx_retries[thread_index][s->session_index]++;
}
return 0;
@@ -303,7 +303,7 @@ create_api_loopback (vlib_main_t * vm)
/* Wait for reply */
bsm->node_index = vlib_get_current_process (vm)->node_runtime.node_index;
- vlib_process_wait_for_event_or_clock (vm, 1.0);
+ vlib_process_wait_for_event_or_clock (vm, 2.0);
event_type = vlib_process_get_events (vm, &event_data);
switch (event_type)
{
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 8ed325d2809..a2214158c1b 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -163,6 +163,33 @@ tcp_connection_del (tcp_connection_t * tc)
tcp_connection_cleanup (tc);
}
+/**
+ * Cleanup half-open connection
+ */
+void
+tcp_half_open_connection_del (tcp_connection_t * tc)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ if (CLIB_DEBUG)
+ memset (tc, 0xFA, sizeof (*tc));
+ clib_spinlock_lock (&tm->half_open_lock);
+ pool_put (tm->half_open_connections, tc);
+ clib_spinlock_unlock (&tm->half_open_lock);
+}
+
+tcp_connection_t *
+tcp_connection_new (u8 thread_index)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tcp_connection_t *tc;
+
+ pool_get (tm->connections[thread_index], tc);
+ memset (tc, 0, sizeof (*tc));
+ tc->c_c_index = tc - tm->connections[thread_index];
+ tc->c_thread_index = thread_index;
+ return tc;
+}
+
/** Notify session that connection has been reset.
*
* Switch state to closed and wait for session to call cleanup.
@@ -170,6 +197,7 @@ tcp_connection_del (tcp_connection_t * tc)
void
tcp_connection_reset (tcp_connection_t * tc)
{
+ TCP_EVT_DBG (TCP_EVT_RST_RCVD, tc);
switch (tc->state)
{
case TCP_STATE_SYN_RCVD:
@@ -178,12 +206,18 @@ tcp_connection_reset (tcp_connection_t * tc)
tcp_connection_cleanup (tc);
break;
case TCP_STATE_SYN_SENT:
+ /* XXX remove sst from call */
+ stream_session_connect_notify (&tc->connection, tc->connection.proto,
+ 1 /* fail */ );
+ tcp_connection_cleanup (tc);
+ break;
case TCP_STATE_ESTABLISHED:
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_FIN_WAIT_1:
case TCP_STATE_FIN_WAIT_2:
case TCP_STATE_CLOSING:
tc->state = TCP_STATE_CLOSED;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
/* Make sure all timers are cleared */
tcp_connection_timers_reset (tc);
@@ -227,6 +261,7 @@ tcp_connection_close (tcp_connection_t * tc)
tc->state = TCP_STATE_CLOSED;
else if (tc->state == TCP_STATE_CLOSE_WAIT)
tc->state = TCP_STATE_LAST_ACK;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
/* If in CLOSED and WAITCLOSE timer is not set, delete connection now */
if (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID
@@ -250,6 +285,7 @@ tcp_session_cleanup (u32 conn_index, u32 thread_index)
/* Wait for the session tx events to clear */
tc->state = TCP_STATE_CLOSED;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
}
@@ -287,7 +323,7 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
* Allocate local port and add if successful add entry to local endpoint
* table to mark the pair as used.
*/
-u16
+int
tcp_allocate_local_port (tcp_main_t * tm, ip46_address_t * ip)
{
transport_endpoint_t *tep;
@@ -484,7 +520,7 @@ tcp_connection_open (ip46_address_t * rmt_addr, u16 rmt_port, u8 is_ip4)
fib_node_index_t fei;
u32 sw_if_index;
ip46_address_t lcl_addr;
- u16 lcl_port;
+ int lcl_port;
/*
* Find the local address and allocate port
@@ -500,12 +536,19 @@ tcp_connection_open (ip46_address_t * rmt_addr, u16 rmt_port, u8 is_ip4)
/* Couldn't find route to destination. Bail out. */
if (fei == FIB_NODE_INDEX_INVALID)
- return -1;
+ {
+ clib_warning ("no route to destination");
+ return -1;
+ }
sw_if_index = fib_entry_get_resolving_interface (fei);
if (sw_if_index == (u32) ~ 0)
- return -1;
+ {
+ clib_warning ("no resolving interface for %U", format_ip46_address,
+ rmt_addr, IP46_TYPE_IP4);
+ return -1;
+ }
if (is_ip4)
{
@@ -570,11 +613,9 @@ tcp_connection_open (ip46_address_t * rmt_addr, u16 rmt_port, u8 is_ip4)
/* The other connection vars will be initialized after SYN ACK */
tcp_connection_timers_init (tc);
- tcp_send_syn (tc);
-
- tc->state = TCP_STATE_SYN_SENT;
-
TCP_EVT_DBG (TCP_EVT_OPEN, tc);
+ tc->state = TCP_STATE_SYN_SENT;
+ tcp_send_syn (tc);
return tc->c_c_index;
}
@@ -1206,7 +1247,7 @@ tcp_main_enable (vlib_main_t * vm)
clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoint table",
200000 /* $$$$ config parameter nbuckets */ ,
(64 << 20) /*$$$ config parameter table size */ );
-
+ clib_spinlock_init (&tm->half_open_lock);
return error;
}
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index fd0d02b99d6..89c30616365 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -33,6 +33,7 @@
#define TCP_DUPACK_THRESHOLD 3
#define TCP_MAX_RX_FIFO_SIZE 4 << 20
+#define TCP_MIN_RX_FIFO_SIZE 4 << 10
#define TCP_IW_N_SEGMENTS 10
#define TCP_ALWAYS_ACK 1 /**< On/off delayed acks */
#define TCP_USE_SACKS 1 /**< Disable only for testing */
@@ -371,11 +372,9 @@ typedef struct _tcp_main
/* Per worker-thread timer wheel for connections timers */
tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
-// /* Convenience per worker-thread vector of connections to DELACK */
-// u32 **delack_connections;
-
/* Pool of half-open connections on which we've sent a SYN */
tcp_connection_t *half_open_connections;
+ clib_spinlock_t half_open_lock;
/* Pool of local TCP endpoints */
transport_endpoint_t *local_endpoints;
@@ -455,6 +454,8 @@ tcp_get_connection_from_transport (transport_connection_t * tconn)
void tcp_connection_close (tcp_connection_t * tc);
void tcp_connection_cleanup (tcp_connection_t * tc);
void tcp_connection_del (tcp_connection_t * tc);
+void tcp_half_open_connection_del (tcp_connection_t * tc);
+tcp_connection_t *tcp_connection_new (u8 thread_index);
void tcp_connection_reset (tcp_connection_t * tc);
u8 *format_tcp_connection_id (u8 * s, va_list * args);
@@ -472,13 +473,15 @@ tcp_listener_get (u32 tli)
always_inline tcp_connection_t *
tcp_half_open_connection_get (u32 conn_index)
{
+ if (pool_is_free_index (tcp_main.half_open_connections, conn_index))
+ return 0;
return pool_elt_at_index (tcp_main.half_open_connections, conn_index);
}
void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b);
void tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b);
void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b);
-void tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4);
+void tcp_send_reset (tcp_connection_t * tc, vlib_buffer_t * pkt, u8 is_ip4);
void tcp_send_syn (tcp_connection_t * tc);
void tcp_send_fin (tcp_connection_t * tc);
void tcp_init_mss (tcp_connection_t * tc);
@@ -658,7 +661,6 @@ tcp_timer_update (tcp_connection_t * tc, u8 timer_id, u32 interval)
tc->c_c_index, timer_id, interval);
}
-/* XXX Switch retransmit to faster TW */
always_inline void
tcp_retransmit_timer_set (tcp_connection_t * tc)
{
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index be51bca2a26..e3da56f4a43 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -19,10 +19,9 @@
#include <vlib/vlib.h>
#define TCP_DEBUG (1)
-#define TCP_DEBUG_SM (0)
-#define TCP_DEBUG_CC (1)
-#define TCP_DEBUG_CC_STAT (1)
-#define TCP_DEBUG_SM_VERBOSE (0)
+#define TCP_DEBUG_SM (2)
+#define TCP_DEBUG_CC (0)
+#define TCP_DEBUG_CC_STAT (0)
#define foreach_tcp_dbg_evt \
_(INIT, "") \
@@ -33,7 +32,9 @@
_(UNBIND, "unbind") \
_(DELETE, "delete") \
_(SYN_SENT, "SYN sent") \
- _(SYN_RTX, "SYN retransmit") \
+ _(SYNACK_SENT, "SYNACK sent") \
+ _(SYNACK_RCVD, "SYNACK rcvd") \
+ _(SYN_RXT, "SYN retransmit") \
_(FIN_SENT, "FIN sent") \
_(ACK_SENT, "ACK sent") \
_(DUPACK_SENT, "DUPACK sent") \
@@ -43,6 +44,7 @@
_(DUPACK_RCVD, "DUPACK rcvd") \
_(FIN_RCVD, "FIN rcvd") \
_(RST_RCVD, "RST rcvd") \
+ _(STATE_CHANGE, "state change") \
_(PKTIZE, "packetize") \
_(INPUT, "in") \
_(SND_WND, "snd_wnd update") \
@@ -96,11 +98,64 @@ typedef enum _tcp_dbg_evt
ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, \
_e, _tc->c_elog_track)
-#define TCP_EVT_INIT_HANDLER(_tc, _fmt, ...) \
+#define TCP_DBG_IP_TAG_LCL(_tc) \
{ \
- _tc->c_elog_track.name = \
- (char *) format (0, _fmt, _tc->c_c_index, 0); \
+ if (_tc->c_is_ip4) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "lcl: %d.%d.%d.%d:%d", \
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->c_lcl_ip.ip4.as_u8[0]; \
+ ed->data[1] = _tc->c_lcl_ip.ip4.as_u8[1]; \
+ ed->data[2] = _tc->c_lcl_ip.ip4.as_u8[2]; \
+ ed->data[3] = _tc->c_lcl_ip.ip4.as_u8[3]; \
+ ed->data[4] = clib_net_to_host_u16(_tc->c_lcl_port); \
+ } \
+}
+
+#define TCP_DBG_IP_TAG_RMT(_tc) \
+{ \
+ if (_tc->c_is_ip4) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "rmt: %d.%d.%d.%d:%d", \
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->c_rmt_ip.ip4.as_u8[0]; \
+ ed->data[1] = _tc->c_rmt_ip.ip4.as_u8[1]; \
+ ed->data[2] = _tc->c_rmt_ip.ip4.as_u8[2]; \
+ ed->data[3] = _tc->c_rmt_ip.ip4.as_u8[3]; \
+ ed->data[4] = clib_net_to_host_u16(_tc->c_rmt_port); \
+ } \
+}
+
+#define TCP_EVT_INIT_HANDLER(_tc, _is_l, ...) \
+{ \
+ char *_fmt = _is_l ? "l[%d].%d:%d%c" : "[%d].%d:%d->.%d:%d%c"; \
+ if (_tc->c_is_ip4) \
+ { \
+ _tc->c_elog_track.name = \
+ (char *) format (0, _fmt, _tc->c_thread_index, \
+ _tc->c_lcl_ip.ip4.as_u8[3], \
+ clib_net_to_host_u16(_tc->c_lcl_port), \
+ _tc->c_rmt_ip.ip4.as_u8[3], \
+ clib_net_to_host_u16(_tc->c_rmt_port), 0); \
+ } \
+ else \
+ _tc->c_elog_track.name = \
+ (char *) format (0, _fmt, _tc->c_thread_index, \
+ _tc->c_lcl_ip.ip6.as_u8[15], \
+ clib_net_to_host_u16(_tc->c_lcl_port), \
+ _tc->c_rmt_ip.ip6.as_u8[15], \
+ clib_net_to_host_u16(_tc->c_rmt_port), 0); \
elog_track_register (&vlib_global_main.elog_main, &_tc->c_elog_track);\
+ TCP_DBG_IP_TAG_LCL(_tc); \
+ TCP_DBG_IP_TAG_RMT(_tc); \
}
#define TCP_EVT_DEALLOC_HANDLER(_tc, ...) \
@@ -110,7 +165,7 @@ typedef enum _tcp_dbg_evt
#define TCP_EVT_OPEN_HANDLER(_tc, ...) \
{ \
- TCP_EVT_INIT_HANDLER(_tc, "s%d%c"); \
+ TCP_EVT_INIT_HANDLER(_tc, 0); \
ELOG_TYPE_DECLARE (_e) = \
{ \
.format = "open: index %d", \
@@ -133,7 +188,7 @@ typedef enum _tcp_dbg_evt
#define TCP_EVT_BIND_HANDLER(_tc, ...) \
{ \
- TCP_EVT_INIT_HANDLER(_tc, "l%d%c"); \
+ TCP_EVT_INIT_HANDLER(_tc, 1); \
ELOG_TYPE_DECLARE (_e) = \
{ \
.format = "bind: listener %d", \
@@ -166,18 +221,6 @@ typedef enum _tcp_dbg_evt
TCP_EVT_DEALLOC_HANDLER(_tc); \
}
-#define TCP_EVT_SYN_RCVD_HANDLER(_tc, ...) \
-{ \
- TCP_EVT_INIT_HANDLER(_tc, "s%d%c"); \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "SYNrx: irs %u", \
- .format_args = "i4", \
- }; \
- DECLARE_ETD(_tc, _e, 1); \
- ed->data[0] = _tc->irs; \
-}
-
#define CONCAT_HELPER(_a, _b) _a##_b
#define CC(_a, _b) CONCAT_HELPER(_a, _b)
#define TCP_EVT_DBG(_evt, _args...) CC(_evt, _HANDLER)(_args)
@@ -190,63 +233,86 @@ typedef enum _tcp_dbg_evt
*/
#if TCP_DEBUG_SM
-#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...) \
+#define TCP_EVT_STATE_CHANGE_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "ack_tx: acked %u rcv_nxt %u rcv_wnd %u snd_nxt %u snd_wnd %u",\
- .format_args = "i4i4i4i4i4", \
+ .format = "state: %s", \
+ .format_args = "t4", \
+ .n_enum_strings = 11, \
+ .enum_strings = { \
+ "closed", \
+ "listen", \
+ "syn-sent", \
+ "syn-rcvd", \
+ "established", \
+ "close_wait", \
+ "fin-wait-1", \
+ "last-ack", \
+ "closing", \
+ "fin-wait-2", \
+ "time-wait", \
+ }, \
}; \
- DECLARE_ETD(_tc, _e, 5); \
- ed->data[0] = _tc->rcv_nxt - _tc->rcv_las; \
- ed->data[1] = _tc->rcv_nxt - _tc->irs; \
- ed->data[2] = _tc->rcv_wnd; \
- ed->data[3] = _tc->snd_nxt - _tc->iss; \
- ed->data[4] = _tc->snd_wnd; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->state; \
}
-#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \
+#define TCP_EVT_SYN_RCVD_HANDLER(_tc, ...) \
{ \
+ TCP_EVT_INIT_HANDLER(_tc, 0); \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "dack_tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
- .format_args = "i4i4i4i4i4", \
+ .format = "syn-rx: irs %u", \
+ .format_args = "i4", \
}; \
- DECLARE_ETD(_tc, _e, 5); \
- ed->data[0] = _tc->rcv_nxt - _tc->irs; \
- ed->data[1] = _tc->rcv_wnd; \
- ed->data[2] = _tc->snd_nxt - _tc->iss; \
- ed->data[3] = tcp_available_wnd(_tc); \
- ed->data[4] = _tc->snd_wnd; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->irs; \
+ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \
}
#define TCP_EVT_SYN_SENT_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "SYNtx: iss %u", \
+ .format = "syn-tx: iss %u", \
.format_args = "i4", \
}; \
DECLARE_ETD(_tc, _e, 1); \
ed->data[0] = _tc->iss; \
+ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \
}
-#define TCP_EVT_SYN_RTX_HANDLER(_tc, ...) \
+#define TCP_EVT_SYNACK_SENT_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "SYNrtx: iss %u", \
- .format_args = "i4", \
+ .format = "synack-tx: iss %u irs %u", \
+ .format_args = "i4i4", \
}; \
- DECLARE_ETD(_tc, _e, 1); \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = _tc->iss; \
+ ed->data[1] = _tc->irs; \
+}
+
+#define TCP_EVT_SYNACK_RCVD_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "synack-rx: iss %u irs %u", \
+ .format_args = "i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 2); \
ed->data[0] = _tc->iss; \
+ ed->data[1] = _tc->irs; \
+ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \
}
#define TCP_EVT_FIN_SENT_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "FINtx: snd_nxt %d rcv_nxt %d", \
+ .format = "fin-tx: snd_nxt %d rcv_nxt %d", \
.format_args = "i4i4", \
}; \
DECLARE_ETD(_tc, _e, 2); \
@@ -258,19 +324,20 @@ typedef enum _tcp_dbg_evt
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "RSTtx: snd_nxt %d rcv_nxt %d", \
+ .format = "rst-tx: snd_nxt %d rcv_nxt %d", \
.format_args = "i4i4", \
}; \
DECLARE_ETD(_tc, _e, 2); \
ed->data[0] = _tc->snd_nxt - _tc->iss; \
ed->data[1] = _tc->rcv_nxt - _tc->irs; \
+ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \
}
#define TCP_EVT_FIN_RCVD_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "FINrx: snd_nxt %d rcv_nxt %d", \
+ .format = "fin-rx: snd_nxt %d rcv_nxt %d", \
.format_args = "i4i4", \
}; \
DECLARE_ETD(_tc, _e, 2); \
@@ -282,7 +349,7 @@ typedef enum _tcp_dbg_evt
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "RSTrx: snd_nxt %d rcv_nxt %d", \
+ .format = "rst-rx: snd_nxt %d rcv_nxt %d", \
.format_args = "i4i4", \
}; \
DECLARE_ETD(_tc, _e, 2); \
@@ -290,6 +357,67 @@ typedef enum _tcp_dbg_evt
ed->data[1] = _tc->rcv_nxt - _tc->irs; \
}
+#define TCP_EVT_SYN_RXT_HANDLER(_tc, _type, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "%s-rxt: iss %u", \
+ .format_args = "t4i4", \
+ .n_enum_strings = 2, \
+ .enum_strings = { \
+ "syn", \
+ "syn-ack", \
+ }, \
+ }; \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = _type; \
+ ed->data[1] = _tc->iss; \
+}
+
+#else
+#define TCP_EVT_SYN_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_SYNACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_SYNACK_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_SYN_RXT_HANDLER(_tc, ...)
+#define TCP_EVT_FIN_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_RST_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_FIN_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_RST_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_STATE_CHANGE_HANDLER(_tc, ...)
+#endif
+
+#if TCP_DEBUG_SM > 1
+
+#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "ack-tx: acked %u rcv_nxt %u rcv_wnd %u snd_nxt %u snd_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->rcv_nxt - _tc->rcv_las; \
+ ed->data[1] = _tc->rcv_nxt - _tc->irs; \
+ ed->data[2] = _tc->rcv_wnd; \
+ ed->data[3] = _tc->snd_nxt - _tc->iss; \
+ ed->data[4] = _tc->snd_wnd; \
+}
+
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->rcv_nxt - _tc->irs; \
+ ed->data[1] = _tc->rcv_wnd; \
+ ed->data[2] = _tc->snd_nxt - _tc->iss; \
+ ed->data[3] = tcp_available_wnd(_tc); \
+ ed->data[4] = _tc->snd_wnd; \
+}
+
#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
@@ -309,7 +437,7 @@ typedef enum _tcp_dbg_evt
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "dack_rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
+ .format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
.format_args = "i4i4i4i4i4", \
}; \
DECLARE_ETD(_tc, _e, 5); \
@@ -370,7 +498,7 @@ typedef enum _tcp_dbg_evt
} \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "TimerPop: %s (%d)", \
+ .format = "timer-pop: %s (%d)", \
.format_args = "t4i4", \
.n_enum_strings = 7, \
.enum_strings = { \
@@ -391,7 +519,8 @@ typedef enum _tcp_dbg_evt
} \
else \
{ \
- clib_warning ("pop for unexisting connection %d", _tc_index); \
+ clib_warning ("pop %d for unexisting connection %d", _timer_id, \
+ _tc_index); \
} \
}
@@ -414,7 +543,7 @@ typedef enum _tcp_dbg_evt
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "paws fail: seq %u end %u tsval %u tsval_recent %u", \
+ .format = "paws-err: seq %u end %u tsval %u tsval_recent %u", \
.format_args = "i4i4i4i4", \
}; \
DECLARE_ETD(_tc, _e, 4); \
@@ -465,12 +594,6 @@ if (_av > 0) \
#else
#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)
#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
-#define TCP_EVT_SYN_SENT_HANDLER(_tc, ...)
-#define TCP_EVT_SYN_RTX_HANDLER(_tc, ...)
-#define TCP_EVT_FIN_SENT_HANDLER(_tc, ...)
-#define TCP_EVT_RST_SENT_HANDLER(_tc, ...)
-#define TCP_EVT_FIN_RCVD_HANDLER(_tc, ...)
-#define TCP_EVT_RST_RCVD_HANDLER(_tc, ...)
#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)
#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
#define TCP_EVT_PKTIZE_HANDLER(_tc, ...)
@@ -485,12 +608,12 @@ if (_av > 0) \
/*
* State machine verbose
*/
-#if TCP_DBG_SM_VERBOSE
+#if TCP_DEBUG_SM > 2
#define TCP_EVT_SND_WND_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "snd_wnd update: %u ", \
+ .format = "snd-wnd update: %u ", \
.format_args = "i4", \
}; \
DECLARE_ETD(_tc, _e, 1); \
@@ -617,6 +740,7 @@ if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \
#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
+#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
#endif
#endif /* SRC_VNET_TCP_TCP_DEBUG_H_ */
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index bc7d9015789..cc5cecdc473 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -349,7 +349,10 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0,
/* 4th: check the SYN bit */
if (tcp_syn (th0))
{
- tcp_send_reset (b0, tc0->c_is_ip4);
+ /* TODO implement RFC 5961 */
+ tcp_make_ack (tc0, b0);
+ *next0 = tcp_next_output (tc0->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_SYN_RCVD, tc0);
return -1;
}
@@ -1246,8 +1249,6 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b,
* Looks okay, process feedback
*/
- TCP_EVT_DBG (TCP_EVT_ACK_RCVD, tc);
-
if (tcp_opts_sack_permitted (&tc->rcv_opts))
tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number);
@@ -1263,6 +1264,8 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b,
if (tc->bytes_acked)
tcp_dequeue_acked (tc, vnet_buffer (b)->tcp.ack_number);
+ TCP_EVT_DBG (TCP_EVT_ACK_RCVD, tc);
+
/*
* Check if we have congestion event
*/
@@ -1496,9 +1499,13 @@ tcp_can_delack (tcp_connection_t * tc)
static int
tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b,
- u16 n_data_bytes, u32 * next0)
+ u32 * next0)
{
- u32 error = 0, n_bytes_to_drop;
+ u32 error = 0, n_bytes_to_drop, n_data_bytes;
+
+ vlib_buffer_advance (b, vnet_buffer (b)->tcp.data_offset);
+ n_data_bytes = vnet_buffer (b)->tcp.data_len;
+ ASSERT (n_data_bytes);
/* Handle out-of-order data */
if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt))
@@ -1512,7 +1519,12 @@ tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b,
/* Completely in the past (possible retransmit) */
if (seq_leq (vnet_buffer (b)->tcp.seq_end, tc->rcv_nxt))
- goto done;
+ {
+ /* Ack retransmissions since we may not have any data to send */
+ tcp_make_ack (tc, b);
+ *next0 = tcp_next_output (tc->c_is_ip4);
+ goto done;
+ }
/* Chop off the bytes in the past */
n_bytes_to_drop = tc->rcv_nxt - vnet_buffer (b)->tcp.seq_number;
@@ -1550,12 +1562,6 @@ in_order:
* segments can be enqueued after fifo tail offset changes. */
error = tcp_session_enqueue_data (tc, b, n_data_bytes);
- if (n_data_bytes == 0)
- {
- *next0 = TCP_NEXT_DROP;
- goto done;
- }
-
/* Check if ACK can be delayed */
if (tcp_can_delack (tc))
{
@@ -1680,7 +1686,9 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
th0 = tcp_buffer_hdr (b0);
- is_fin = (th0->flags & TCP_FLAG_FIN) != 0;
+ /* N.B. buffer is rewritten if segment is ooo. Thus, th0 becomes a
+ * dangling reference. */
+ is_fin = tcp_is_fin (th0);
/* SYNs, FINs and data consume sequence numbers */
vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
@@ -1700,29 +1708,23 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* 5: check the ACK field */
if (tcp_rcv_ack (tc0, b0, th0, &next0, &error0))
- {
- goto done;
- }
+ goto done;
/* 6: check the URG bit TODO */
/* 7: process the segment text */
-
- vlib_buffer_advance (b0, vnet_buffer (b0)->tcp.data_offset);
- error0 = tcp_segment_rcv (tm, tc0, b0,
- vnet_buffer (b0)->tcp.data_len, &next0);
-
- /* N.B. buffer is rewritten if segment is ooo. Thus, th0 becomes a
- * dangling reference. */
+ if (vnet_buffer (b0)->tcp.data_len)
+ error0 = tcp_segment_rcv (tm, tc0, b0, &next0);
/* 8: check the FIN bit */
- if (is_fin)
+ if (PREDICT_FALSE (is_fin))
{
/* Enter CLOSE-WAIT and notify session. Don't send ACK, instead
* wait for session to call close. To avoid lingering
* in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
tc0->state = TCP_STATE_CLOSE_WAIT;
TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
+ tc0->rcv_nxt += (vnet_buffer (b0)->tcp.data_len == 0);
stream_session_disconnect_notify (&tc0->connection);
tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
}
@@ -1856,6 +1858,21 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
seq0 = vnet_buffer (b0)->tcp.seq_number;
tcp0 = tcp_buffer_hdr (b0);
+ if (!tc0)
+ {
+ ip4_header_t *ip40 = vlib_buffer_get_current (b0);
+ tcp0 = ip4_next_header (ip40);
+ tc0 =
+ (tcp_connection_t *)
+ stream_session_lookup_transport_wt4 (&ip40->dst_address,
+ &ip40->src_address,
+ tcp0->dst_port,
+ tcp0->src_port,
+ SESSION_TYPE_IP4_TCP,
+ my_thread_index);
+ ASSERT (0);
+ goto drop;
+ }
if (PREDICT_FALSE
(!tcp_ack (tcp0) && !tcp_rst (tcp0) && !tcp_syn (tcp0)))
goto drop;
@@ -1881,8 +1898,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (ack0 <= tc0->iss || ack0 > tc0->snd_nxt)
{
if (!tcp_rst (tcp0))
- tcp_send_reset (b0, is_ip4);
-
+ tcp_send_reset (tc0, b0, is_ip4);
goto drop;
}
@@ -1900,11 +1916,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* If ACK is acceptable, signal client that peer is not
* willing to accept connection and drop connection*/
if (tcp_ack (tcp0))
- {
- stream_session_connect_notify (&tc0->connection, sst,
- 1 /* fail */ );
- tcp_connection_cleanup (tc0);
- }
+ tcp_connection_reset (tc0);
goto drop;
}
@@ -1920,6 +1932,10 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (!tcp_syn (tcp0))
goto drop;
+ /* Parse options */
+ if (tcp_options_parse (tcp0, &tc0->rcv_opts))
+ goto drop;
+
/* Stop connection establishment and retransmit timers */
tcp_timer_reset (tc0, TCP_TIMER_ESTABLISH);
tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN);
@@ -1928,19 +1944,11 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
* current thread pool. */
pool_get (tm->connections[my_thread_index], new_tc0);
clib_memcpy (new_tc0, tc0, sizeof (*new_tc0));
-
- new_tc0->c_thread_index = my_thread_index;
new_tc0->c_c_index = new_tc0 - tm->connections[my_thread_index];
-
- /* Cleanup half-open connection XXX lock */
- pool_put (tm->half_open_connections, tc0);
-
+ new_tc0->c_thread_index = my_thread_index;
new_tc0->rcv_nxt = vnet_buffer (b0)->tcp.seq_end;
new_tc0->irs = seq0;
-
- /* Parse options */
- if (tcp_options_parse (tcp0, &new_tc0->rcv_opts))
- goto drop;
+ tcp_half_open_connection_del (tc0);
if (tcp_opts_tstamp (&new_tc0->rcv_opts))
{
@@ -1959,7 +1967,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_connection_init_vars (new_tc0);
/* SYN-ACK: See if we can switch to ESTABLISHED state */
- if (tcp_ack (tcp0))
+ if (PREDICT_TRUE (tcp_ack (tcp0)))
{
/* Our SYN is ACKed: we have iss < ack = snd_una */
@@ -1976,7 +1984,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
0))
{
tcp_connection_cleanup (new_tc0);
- tcp_send_reset (b0, is_ip4);
+ tcp_send_reset (tc0, b0, is_ip4);
goto drop;
}
@@ -1986,6 +1994,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* Update rtt with the syn-ack sample */
new_tc0->bytes_acked = 1;
tcp_update_rtt (new_tc0, vnet_buffer (b0)->tcp.ack_number);
+ TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, new_tc0);
}
/* SYN: Simultaneous open. Change state to SYN-RCVD and send SYN-ACK */
else
@@ -1997,12 +2006,12 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
(&new_tc0->connection, sst, 0))
{
tcp_connection_cleanup (new_tc0);
- tcp_send_reset (b0, is_ip4);
+ tcp_send_reset (tc0, b0, is_ip4);
+ TCP_EVT_DBG (TCP_EVT_RST_SENT, tc0);
goto drop;
}
tc0->rtt_ts = 0;
-
tcp_make_synack (new_tc0, b0);
next0 = tcp_next_output (is_ip4);
@@ -2010,12 +2019,10 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
/* Read data, if any */
- if (vnet_buffer (b0)->tcp.data_len)
+ if (PREDICT_FALSE (vnet_buffer (b0)->tcp.data_len))
{
- vlib_buffer_advance (b0, vnet_buffer (b0)->tcp.data_offset);
- error0 = tcp_segment_rcv (tm, new_tc0, b0,
- vnet_buffer (b0)->tcp.data_len,
- &next0);
+ ASSERT (0);
+ error0 = tcp_segment_rcv (tm, new_tc0, b0, &next0);
if (error0 == TCP_ERROR_PURE_ACK)
error0 = TCP_ERROR_SYN_ACKS_RCVD;
}
@@ -2114,6 +2121,7 @@ VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
/* *INDENT-ON* */
VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv);
+
/**
* Handles reception for all states except LISTEN, SYN-SENT and ESTABLISHED
* as per RFC793 p. 64
@@ -2202,7 +2210,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
*/
if (!tcp_rcv_ack_is_acceptable (tc0, b0))
{
- tcp_send_reset (b0, is_ip4);
+ tcp_send_reset (tc0, b0, is_ip4);
goto drop;
}
@@ -2243,6 +2251,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
ASSERT (tcp_fin (tcp0));
tc0->state = TCP_STATE_FIN_WAIT_2;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+
/* Stop all timers, 2MSL will be set lower */
tcp_connection_timers_reset (tc0);
}
@@ -2269,6 +2279,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* XXX test that send queue empty */
tc0->state = TCP_STATE_TIME_WAIT;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
goto drop;
break;
@@ -2289,6 +2300,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
tc0->state = TCP_STATE_CLOSED;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
/* Don't delete the connection/session yet. Instead, wait a
* reasonable amount of time until the pipes are cleared. In
@@ -2329,10 +2341,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
case TCP_STATE_ESTABLISHED:
case TCP_STATE_FIN_WAIT_1:
case TCP_STATE_FIN_WAIT_2:
- vlib_buffer_advance (b0, vnet_buffer (b0)->tcp.data_offset);
- error0 = tcp_segment_rcv (tm, tc0, b0,
- vnet_buffer (b0)->tcp.data_len,
- &next0);
+ if (vnet_buffer (b0)->tcp.data_len)
+ error0 = tcp_segment_rcv (tm, tc0, b0, &next0);
break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_CLOSING:
@@ -2357,6 +2367,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
next0 = tcp_next_output (tc0->c_is_ip4);
stream_session_disconnect_notify (&tc0->connection);
tc0->state = TCP_STATE_CLOSE_WAIT;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_CLOSING:
@@ -2367,6 +2378,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tc0->state = TCP_STATE_TIME_WAIT;
tcp_connection_timers_reset (tc0);
tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
break;
case TCP_STATE_FIN_WAIT_2:
/* Got FIN, send ACK! */
@@ -2375,6 +2387,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
tcp_make_ack (tc0, b0);
next0 = tcp_next_output (is_ip4);
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
break;
case TCP_STATE_TIME_WAIT:
/* Remain in the TIME-WAIT state. Restart the 2 MSL time-wait
@@ -2486,7 +2499,6 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 n_left_from, next_index, *from, *to_next;
u32 my_thread_index = vm->thread_index;
- tcp_main_t *tm = vnet_get_tcp_main ();
u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
from = vlib_frame_vector_args (from_frame);
@@ -2549,14 +2561,10 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* 3. check for a SYN (did that already) */
/* Create child session and send SYN-ACK */
- pool_get (tm->connections[my_thread_index], child0);
- memset (child0, 0, sizeof (*child0));
-
- child0->c_c_index = child0 - tm->connections[my_thread_index];
+ child0 = tcp_connection_new (my_thread_index);
child0->c_lcl_port = lc0->c_lcl_port;
child0->c_rmt_port = th0->src_port;
child0->c_is_ip4 = is_ip4;
- child0->c_thread_index = my_thread_index;
child0->state = TCP_STATE_SYN_RCVD;
if (is_ip4)
@@ -2605,7 +2613,6 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
child0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
tcp_connection_init_vars (child0);
-
TCP_EVT_DBG (TCP_EVT_SYN_RCVD, child0);
/* Reuse buffer to make syn-ack and send */
@@ -2722,6 +2729,31 @@ typedef enum _tcp_input_next
#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
+static u8
+tcp_lookup_is_valid (tcp_connection_t * tc, tcp_header_t * hdr)
+{
+ transport_connection_t *tmp;
+ if (!tc)
+ return 1;
+
+ u8 is_valid = (tc->c_lcl_port == hdr->dst_port
+ && (tc->state == TCP_STATE_LISTEN
+ || tc->c_rmt_port == hdr->src_port));
+
+ if (!is_valid)
+ {
+ if ((tmp = stream_session_lookup_half_open (&tc->connection)))
+ {
+ if (tmp->lcl_port == hdr->dst_port
+ && tmp->rmt_port == hdr->src_port)
+ {
+ clib_warning ("half-open is valid!");
+ }
+ }
+ }
+ return is_valid;
+}
+
always_inline uword
tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame, int is_ip4)
@@ -2774,7 +2806,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
- n_advance_bytes0;
- /* lookup session */
tc0 =
(tcp_connection_t *)
stream_session_lookup_transport_wt4 (&ip40->dst_address,
@@ -2783,6 +2814,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp0->src_port,
SESSION_TYPE_IP4_TCP,
my_thread_index);
+ ASSERT (tcp_lookup_is_valid (tc0, tcp0));
}
else
{
@@ -2795,12 +2827,13 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tc0 =
(tcp_connection_t *)
- stream_session_lookup_transport_wt6 (&ip60->src_address,
- &ip60->dst_address,
- tcp0->src_port,
+ stream_session_lookup_transport_wt6 (&ip60->dst_address,
+ &ip60->src_address,
tcp0->dst_port,
+ tcp0->src_port,
SESSION_TYPE_IP6_TCP,
my_thread_index);
+ ASSERT (tcp_lookup_is_valid (tc0, tcp0));
}
/* Length check */
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 35f3eba15e0..5e9ecf114a7 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -75,12 +75,34 @@ tcp_window_compute_scale (u32 available_space)
}
/**
- * TCP's IW as recommended by RFC6928
+ * Update max segment size we're able to process.
+ *
+ * The value is constrained by our interface's MTU and IP options. It is
+ * also what we advertise to our peer.
+ */
+void
+tcp_update_rcv_mss (tcp_connection_t * tc)
+{
+ /* TODO find our iface MTU */
+ tc->mss = dummy_mtu;
+}
+
+/**
+ * TCP's initial window
*/
always_inline u32
tcp_initial_wnd_unscaled (tcp_connection_t * tc)
{
- return TCP_IW_N_SEGMENTS * tc->mss;
+ /* RFC 6928 recommends the value lower. However at the time our connections
+ * are initialized, fifos may not be allocated. Therefore, advertise the
+ * smallest possible unscaled window size and update once fifos are
+ * assigned to the session.
+ */
+ /*
+ tcp_update_rcv_mss (tc);
+ TCP_IW_N_SEGMENTS * tc->mss;
+ */
+ return TCP_MIN_RX_FIFO_SIZE;
}
/**
@@ -373,19 +395,6 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
}
/**
- * Update max segment size we're able to process.
- *
- * The value is constrained by our interface's MTU and IP options. It is
- * also what we advertise to our peer.
- */
-void
-tcp_update_rcv_mss (tcp_connection_t * tc)
-{
- /* TODO find our iface MTU */
- tc->mss = dummy_mtu;
-}
-
-/**
* Update snd_mss to reflect the effective segment size that we can send
* by taking into account all TCP options, including SACKs
*/
@@ -576,6 +585,7 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
/* Init retransmit timer */
tcp_retransmit_timer_set (tc);
+ TCP_EVT_DBG (TCP_EVT_SYNACK_SENT, tc);
}
always_inline void
@@ -684,7 +694,7 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0,
* Send reset without reusing existing buffer
*/
void
-tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4)
+tcp_send_reset (tcp_connection_t * tc, vlib_buffer_t * pkt, u8 is_ip4)
{
vlib_buffer_t *b;
u32 bi;
@@ -720,7 +730,7 @@ tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4)
{
flags = TCP_FLAG_RST;
seq = pkt_th->ack_number;
- ack = 0;
+ ack = (tc && tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
}
else
{
@@ -754,6 +764,7 @@ tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4)
}
tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4);
+ TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
void
@@ -839,6 +850,7 @@ tcp_send_syn (tcp_connection_t * tc)
tcp_push_ip_hdr (tm, tc, b);
tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc);
}
always_inline void
@@ -1148,12 +1160,13 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
-
tcp_push_hdr_i (tc, b, tc->state, 1);
/* Account for the SYN */
tc->snd_nxt += 1;
tc->rtt_ts = 0;
+ TCP_EVT_DBG (TCP_EVT_SYN_RXT, tc,
+ (tc->state == TCP_STATE_SYN_SENT ? 0 : 1));
}
else
{
@@ -1173,8 +1186,6 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
{
ASSERT (tc->state == TCP_STATE_SYN_SENT);
- TCP_EVT_DBG (TCP_EVT_SYN_RTX, tc);
-
/* This goes straight to ipx_lookup */
tcp_push_ip_hdr (tm, tc, b);
tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c
index f37ba96dab8..5c40ddf9ceb 100644
--- a/src/vnet/tcp/tcp_test.c
+++ b/src/vnet/tcp/tcp_test.c
@@ -1551,6 +1551,101 @@ tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input)
}
static int
+tcp_test_lookup (vlib_main_t * vm, unformat_input_t * input)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ tcp_main_t *tm = &tcp_main;
+ transport_connection_t _tc1, *tc1 = &_tc1, _tc2, *tc2 = &_tc2, *tconn;
+ tcp_connection_t *tc;
+ stream_session_t *s;
+ u8 cmp = 0;
+
+ pool_get (smm->sessions[0], s);
+ memset (s, 0, sizeof (*s));
+ s->session_index = s - smm->sessions[0];
+
+ pool_get (tm->connections[0], tc);
+ memset (tc, 0, sizeof (*tc));
+ tc->connection.c_index = tc - tm->connections[0];
+ tc->connection.s_index = s->session_index;
+ s->connection_index = tc->connection.c_index;
+
+ tc->connection.lcl_ip.ip4.as_u32 = clib_host_to_net_u32 (0x06000101);
+ tc->connection.rmt_ip.ip4.as_u32 = clib_host_to_net_u32 (0x06000103);
+ tc->connection.lcl_port = 35051;
+ tc->connection.rmt_port = 53764;
+ tc->connection.proto = 0;
+ clib_memcpy (tc1, &tc->connection, sizeof (*tc1));
+
+ pool_get (session_manager_main.sessions[0], s);
+ memset (s, 0, sizeof (*s));
+ s->session_index = s - smm->sessions[0];
+ pool_get (tm->connections[0], tc);
+ memset (tc, 0, sizeof (*tc));
+ tc->connection.c_index = tc - tm->connections[0];
+ tc->connection.s_index = s->session_index;
+ s->connection_index = tc->connection.c_index;
+
+ tc->connection.lcl_ip.ip4.as_u32 = clib_host_to_net_u32 (0x06000101);
+ tc->connection.rmt_ip.ip4.as_u32 = clib_host_to_net_u32 (0x06000102);
+ tc->connection.lcl_port = 38225;
+ tc->connection.rmt_port = 53764;
+ tc->connection.proto = 0;
+ clib_memcpy (tc2, &tc->connection, sizeof (*tc2));
+
+ /*
+ * Confirm that connection lookup works
+ */
+
+ stream_session_table_add_for_tc (tc1, tc1->s_index);
+ tconn = stream_session_lookup_transport_wt4 (&tc1->lcl_ip.ip4,
+ &tc1->rmt_ip.ip4,
+ tc1->lcl_port, tc1->rmt_port,
+ tc1->proto, 0);
+ cmp = (memcmp (&tconn->rmt_ip, &tc1->rmt_ip, sizeof (tc1->rmt_ip)) == 0);
+ TCP_TEST ((cmp), "rmt ip is identical %d", cmp);
+ TCP_TEST ((tconn->lcl_port == tc1->lcl_port),
+ "rmt port is identical %d", tconn->lcl_port == tc1->lcl_port);
+
+ /*
+ * Non-existing connection lookup should not work
+ */
+
+ tconn = stream_session_lookup_transport_wt4 (&tc2->lcl_ip.ip4,
+ &tc2->rmt_ip.ip4,
+ tc2->lcl_port, tc2->rmt_port,
+ tc2->proto, 0);
+ TCP_TEST ((tconn == 0), "lookup result should be null");
+
+ /*
+ * Delete and lookup again
+ */
+ stream_session_table_del_for_tc (tc1);
+ tconn = stream_session_lookup_transport_wt4 (&tc1->lcl_ip.ip4,
+ &tc1->rmt_ip.ip4,
+ tc1->lcl_port, tc1->rmt_port,
+ tc1->proto, 0);
+ TCP_TEST ((tconn == 0), "lookup result should be null");
+ tconn = stream_session_lookup_transport_wt4 (&tc2->lcl_ip.ip4,
+ &tc2->rmt_ip.ip4,
+ tc2->lcl_port, tc2->rmt_port,
+ tc2->proto, 0);
+ TCP_TEST ((tconn == 0), "lookup result should be null");
+
+ /*
+ * Re-add and lookup tc2
+ */
+ stream_session_table_add_for_tc (tc1, tc1->s_index);
+ tconn = stream_session_lookup_transport_wt4 (&tc2->lcl_ip.ip4,
+ &tc2->rmt_ip.ip4,
+ tc2->lcl_port, tc2->rmt_port,
+ tc2->proto, 0);
+ TCP_TEST ((tconn == 0), "lookup result should be null");
+
+ return 0;
+}
+
+static int
tcp_test_session (vlib_main_t * vm, unformat_input_t * input)
{
int rv = 0;
@@ -1632,6 +1727,10 @@ tcp_test (vlib_main_t * vm,
{
res = tcp_test_session (vm, input);
}
+ else if (unformat (input, "lookup"))
+ {
+ res = tcp_test_lookup (vm, input);
+ }
else
break;
}