aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2018-11-09 14:34:08 -0800
committerDave Barach <openvpp@barachs.net>2018-11-12 16:29:46 +0000
commitb11175d797c11880d19442f178db5df526ca1256 (patch)
treeca26b8c7d94fe024b104b9a1cd93a22a15764848
parent569a278597bd1af58a4fa9a5f4341cafc99a382b (diff)
tcp: handle disconnects after enq notifications
Make sure that we notify the app of the data enqueued in the burst before notifying of disconnect. Change-Id: I7747a5cbb4c6bc9132007f849c24ce04b7841273 Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r--src/vnet/tcp/tcp.c2
-rw-r--r--src/vnet/tcp/tcp.h61
-rw-r--r--src/vnet/tcp/tcp_input.c64
-rw-r--r--src/vnet/tcp/tcp_output.c3
4 files changed, 93 insertions, 37 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 695f614a91c..2511a176e00 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -1332,11 +1332,13 @@ tcp_main_enable (vlib_main_t * vm)
vec_validate (tm->wrk_ctx[thread].postponed_fast_rxt, 255);
vec_validate (tm->wrk_ctx[thread].pending_deq_acked, 255);
vec_validate (tm->wrk_ctx[thread].pending_acks, 255);
+ vec_validate (tm->wrk_ctx[thread].pending_disconnects, 255);
vec_reset_length (tm->wrk_ctx[thread].pending_fast_rxt);
vec_reset_length (tm->wrk_ctx[thread].ongoing_fast_rxt);
vec_reset_length (tm->wrk_ctx[thread].postponed_fast_rxt);
vec_reset_length (tm->wrk_ctx[thread].pending_deq_acked);
vec_reset_length (tm->wrk_ctx[thread].pending_acks);
+ vec_reset_length (tm->wrk_ctx[thread].pending_disconnects);
tm->wrk_ctx[thread].vm = vlib_mains[thread];
/*
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 46262978a83..d4bebeb47b8 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -118,7 +118,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
_(SENT_RCV_WND0, "Sent 0 rcv_wnd") \
_(RECOVERY, "Recovery") \
_(FAST_RECOVERY, "Fast Recovery") \
- _(FR_1_SMSS, "Sent 1 SMSS") \
+ _(DCNT_PENDING, "Disconnect pending") \
_(HALF_OPEN_DONE, "Half-open completed") \
_(FINPNDG, "FIN pending") \
_(FRXT_PENDING, "Fast-retransmit pending") \
@@ -352,9 +352,9 @@ struct _tcp_cc_algorithm
#define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY)
#define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_RECOVERY))
#define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh)
-#define tcp_fastrecovery_sent_1_smss(tc) ((tc)->flags & TCP_CONN_FR_1_SMSS)
-#define tcp_fastrecovery_1_smss_on(tc) ((tc)->flags |= TCP_CONN_FR_1_SMSS)
-#define tcp_fastrecovery_1_smss_off(tc) ((tc)->flags &= ~TCP_CONN_FR_1_SMSS)
+#define tcp_disconnect_pending(tc) ((tc)->flags & TCP_CONN_DCNT_PENDING)
+#define tcp_disconnect_pending_on(tc) ((tc)->flags |= TCP_CONN_DCNT_PENDING)
+#define tcp_disconnect_pending_off(tc) ((tc)->flags &= ~TCP_CONN_DCNT_PENDING)
#define tcp_fastrecovery_first(tc) ((tc)->flags & TCP_CONN_FRXT_FIRST)
#define tcp_fastrecovery_first_on(tc) ((tc)->flags |= TCP_CONN_FRXT_FIRST)
#define tcp_fastrecovery_first_off(tc) ((tc)->flags &= ~TCP_CONN_FRXT_FIRST)
@@ -366,7 +366,6 @@ always_inline void
tcp_cong_recovery_off (tcp_connection_t * tc)
{
tc->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY);
- tcp_fastrecovery_1_smss_off (tc);
tcp_fastrecovery_first_off (tc);
}
@@ -386,26 +385,46 @@ typedef struct _tcp_lookup_dispatch
typedef struct tcp_worker_ctx_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 time_now; /**< worker time */
- tw_timer_wheel_16t_2w_512sl_t timer_wheel; /**< worker timer wheel */
- u32 *tx_buffers; /**< tx buffer free list */
- vlib_frame_t *tx_frames[2]; /**< tx frames for tcp 4/6
- output nodes */
- vlib_frame_t *ip_lookup_tx_frames[2]; /**< tx frames for ip 4/6
- lookup nodes */
- u32 *pending_fast_rxt; /**< vector of connections
- needing fast rxt */
- u32 *ongoing_fast_rxt; /**< vector of connections
- now doing fast rxt */
- u32 *postponed_fast_rxt; /**< vector of connections
- that will do fast rxt */
+ /** worker time */
+ u32 time_now;
+
+ /** worker timer wheel */
+ tw_timer_wheel_16t_2w_512sl_t timer_wheel;
+
+ /** tx buffer free list */
+ u32 *tx_buffers;
+
+ /** tx frames for tcp 4/6 output nodes */
+ vlib_frame_t *tx_frames[2];
+
+ /** tx frames for ip 4/6 lookup nodes */
+ vlib_frame_t *ip_lookup_tx_frames[2];
+
+ /** vector of connections needing fast rxt */
+ u32 *pending_fast_rxt;
+
+ /** vector of connections now doing fast rxt */
+ u32 *ongoing_fast_rxt;
+
+ /** vector of connections that will do fast rxt */
+ u32 *postponed_fast_rxt;
+
+ /** vector of pending ack dequeues */
u32 *pending_deq_acked;
+
+ /** vector of pending acks */
u32 *pending_acks;
- vlib_main_t *vm; /**< pointer to vm */
+
+ /** vector of pending disconnect notifications */
+ u32 *pending_disconnects;
+
+ /** convenience pointer to this thread's vlib main */
+ vlib_main_t *vm;
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
- u8 cached_opts[40]; /**< cached 'on the wire'
- options for bursts */
+
+ /** cached 'on the wire' options for bursts */
+ u8 cached_opts[40];
} tcp_worker_ctx_t;
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 2578b7d1051..0c8706567ae 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -1131,7 +1131,6 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
tc->rtt_ts = 0;
tcp_fastrecovery_off (tc);
- tcp_fastrecovery_1_smss_off (tc);
tcp_fastrecovery_first_off (tc);
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
@@ -1600,6 +1599,54 @@ process_ack:
return 0;
}
+static void
+tcp_program_disconnect (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+ if (!tcp_disconnect_pending (tc))
+ {
+ vec_add1 (wrk->pending_disconnects, tc->c_c_index);
+ tcp_disconnect_pending_on (tc);
+ }
+}
+
+static void
+tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
+{
+ u32 thread_index, *pending_disconnects;
+ tcp_connection_t *tc;
+ int i;
+
+ if (!vec_len (wrk->pending_disconnects))
+ return;
+
+ thread_index = wrk->vm->thread_index;
+ pending_disconnects = wrk->pending_disconnects;
+ for (i = 0; i < vec_len (pending_disconnects); i++)
+ {
+ tc = tcp_connection_get (pending_disconnects[i], thread_index);
+ tcp_disconnect_pending_off (tc);
+ stream_session_disconnect_notify (&tc->connection);
+ }
+ _vec_len (wrk->pending_disconnects) = 0;
+}
+
+static void
+tcp_rcv_fin (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
+ u32 * error)
+{
+ /* Enter CLOSE-WAIT and notify session. To avoid lingering
+ * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
+ /* Account for the FIN if nothing else was received */
+ if (vnet_buffer (b)->tcp.data_len == 0)
+ tc->rcv_nxt += 1;
+ tcp_program_ack (wrk, tc);
+ tc->state = TCP_STATE_CLOSE_WAIT;
+ tcp_program_disconnect (wrk, tc);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc);
+ *error = TCP_ERROR_FIN_RCVD;
+}
+
static u8
tcp_sack_vector_is_sane (sack_block_t * sacks)
{
@@ -2099,19 +2146,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* 8: check the FIN bit */
if (PREDICT_FALSE (is_fin))
- {
- /* Enter CLOSE-WAIT and notify session. To avoid lingering
- * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
- /* Account for the FIN if nothing else was received */
- if (vnet_buffer (b0)->tcp.data_len == 0)
- tc0->rcv_nxt += 1;
- tcp_program_ack (wrk, tc0);
- tc0->state = TCP_STATE_CLOSE_WAIT;
- stream_session_disconnect_notify (&tc0->connection);
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
- TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
- error0 = TCP_ERROR_FIN_RCVD;
- }
+ tcp_rcv_fin (wrk, tc0, b0, &error0);
done:
tcp_inc_err_counter (err_counters, error0, 1);
@@ -2122,6 +2157,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
err_counters[TCP_ERROR_EVENT_FIFO_FULL] = errors;
tcp_store_err_counters (established, err_counters);
tcp_handle_postponed_dequeues (wrk);
+ tcp_handle_disconnects (wrk);
vlib_buffer_free (vm, first_buffer, frame->n_vectors);
return frame->n_vectors;
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index e16095b635d..49156368574 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1201,8 +1201,7 @@ tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b)
tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0,
/* burst */ 1);
tc->snd_una_max = tc->snd_nxt;
- ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd
- + tcp_fastrecovery_sent_1_smss (tc) * tc->snd_mss));
+ ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
/* If not tracking an ACK, start tracking */
if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))