From 25579b4acd449e1bae30d2a20a44b77741c8e1fd Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Wed, 6 Jun 2018 17:55:02 -0700 Subject: tcp: cleanup connection/session fixes - Cleanup session state after last ack and avoid using a cleanup timer. - Change session cleanup to free the session as opposed to waiting for delete notify. - When in close-wait, postpone sending the fin on close until all outstanding data has been sent. - Don't flush rx fifo unless in closed state Change-Id: Ic2a4f0d5568b65c83f4b55b6c469a7b24b947f39 Signed-off-by: Florin Coras --- src/svm/svm_fifo.c | 7 +++++++ src/svm/svm_fifo.h | 1 + src/vnet/sctp/sctp_output.c | 2 +- src/vnet/session-apps/proxy.c | 4 ++-- src/vnet/session/session.c | 46 ++++++++++++++++++++++++++--------------- src/vnet/session/session.h | 2 +- src/vnet/session/session_node.c | 9 ++++---- src/vnet/tcp/tcp.c | 29 ++++++++++++++------------ src/vnet/tcp/tcp.h | 2 +- src/vnet/tcp/tcp_input.c | 24 ++++++++++++++------- src/vnet/tcp/tcp_output.c | 5 +++-- 11 files changed, 81 insertions(+), 50 deletions(-) diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c index 10c319236ee..47df22547ac 100644 --- a/src/svm/svm_fifo.c +++ b/src/svm/svm_fifo.c @@ -828,6 +828,13 @@ svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes) return total_drop_bytes; } +void +svm_fifo_dequeue_drop_all (svm_fifo_t * f) +{ + f->head = f->tail; + __sync_fetch_and_sub (&f->cursize, f->cursize); +} + u32 svm_fifo_number_ooo_segments (svm_fifo_t * f) { diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h index 39cdcc06a0c..90a49c03fc1 100644 --- a/src/svm/svm_fifo.h +++ b/src/svm/svm_fifo.h @@ -152,6 +152,7 @@ int svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here); int svm_fifo_peek (svm_fifo_t * f, u32 offset, u32 max_bytes, u8 * copy_here); int svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes); +void svm_fifo_dequeue_drop_all (svm_fifo_t * f); u32 svm_fifo_number_ooo_segments (svm_fifo_t * f); ooo_segment_t *svm_fifo_first_ooo_segment (svm_fifo_t * f); void svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer); diff --git a/src/vnet/sctp/sctp_output.c b/src/vnet/sctp/sctp_output.c index 3c83f68dc29..0c865caa33d 100644 --- a/src/vnet/sctp/sctp_output.c +++ b/src/vnet/sctp/sctp_output.c @@ -1478,7 +1478,7 @@ sctp_prepare_data_retransmit (sctp_connection_t * sctp_conn, * Make sure we can retransmit something */ available_bytes = - stream_session_tx_fifo_max_dequeue (&sctp_conn->sub_conn[idx].connection); + session_tx_fifo_max_dequeue (&sctp_conn->sub_conn[idx].connection); ASSERT (available_bytes >= offset); available_bytes -= offset; if (!available_bytes) diff --git a/src/vnet/session-apps/proxy.c b/src/vnet/session-apps/proxy.c index 596b0374b95..78aa0de2840 100644 --- a/src/vnet/session-apps/proxy.c +++ b/src/vnet/session-apps/proxy.c @@ -423,7 +423,7 @@ proxy_server_attach () a->options[APP_OPTIONS_TX_FIFO_SIZE] = pm->fifo_size; a->options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = pm->private_segment_count; a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = - pm->prealloc_fifos ? pm->prealloc_fifos : 1; + pm->prealloc_fifos ? pm->prealloc_fifos : 0; a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; @@ -456,7 +456,7 @@ active_open_attach (void) options[APP_OPTIONS_TX_FIFO_SIZE] = pm->fifo_size; options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = pm->private_segment_count; options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = - pm->prealloc_fifos ? pm->prealloc_fifos : 1; + pm->prealloc_fifos ? pm->prealloc_fifos : 0; options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN | APP_OPTIONS_FLAGS_IS_PROXY; diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index cfba31ec406..6559e318ca8 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -414,7 +414,7 @@ stream_session_no_space (transport_connection_t * tc, u32 thread_index, } u32 -stream_session_tx_fifo_max_dequeue (transport_connection_t * tc) +session_tx_fifo_max_dequeue (transport_connection_t * tc) { stream_session_t *s = session_get (tc->s_index, tc->thread_index); if (!s->server_tx_fifo) @@ -452,12 +452,10 @@ session_enqueue_notify (stream_session_t * s, u8 block) session_fifo_event_t evt; svm_queue_t *q; - if (PREDICT_FALSE (s->session_state >= SESSION_STATE_CLOSING)) + if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED)) { /* Session is closed so app will never clean up. Flush rx fifo */ - u32 to_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo); - if (to_dequeue) - svm_fifo_dequeue_drop (s->server_rx_fifo, to_dequeue); + svm_fifo_dequeue_drop_all (s->server_rx_fifo); return 0; } @@ -727,10 +725,13 @@ stream_session_disconnect_notify (transport_connection_t * tc) s = session_get (tc->s_index, tc->thread_index); server = application_get (s->app_index); server->cb_fns.session_disconnect_callback (s); + s->session_state = SESSION_STATE_CLOSING; } /** * Cleans up session and lookup table. + * + * Transport connection must still be valid. */ void stream_session_delete (stream_session_t * s) @@ -1065,13 +1066,23 @@ stream_session_disconnect (stream_session_t * s) session_manager_main_t *smm = &session_manager_main; session_fifo_event_t *evt; - if (!s || s->session_state >= SESSION_STATE_CLOSING) + if (!s) return; + + if (s->session_state >= SESSION_STATE_CLOSING) + { + /* Session already closed. Clear the tx fifo */ + if (s->session_state == SESSION_STATE_CLOSED) + svm_fifo_dequeue_drop_all (s->server_tx_fifo); + return; + } + s->session_state = SESSION_STATE_CLOSING; /* If we are in the handler thread, or being called with the worker barrier - * held (api/cli), just append a new event pending disconnects vector. */ - if (thread_index > 0 || !vlib_get_current_process (vlib_get_main ())) + * held (api/cli), just append a new event to pending disconnects vector. */ + if ((thread_index == 0 && !vlib_get_current_process (vlib_get_main ())) + || thread_index == s->thread_index) { ASSERT (s->thread_index == thread_index || thread_index == 0); vec_add2 (smm->pending_disconnects[s->thread_index], evt, 1); @@ -1103,23 +1114,24 @@ stream_session_disconnect_transport (stream_session_t * s) /** * Cleanup transport and session state. * - * Notify transport of the cleanup, wait for a delete notify to actually - * remove the session state. + * Notify transport of the cleanup and free the session. This should + * be called only if transport reported some error and is already + * closed. */ void stream_session_cleanup (stream_session_t * s) { - int rv; - s->session_state = SESSION_STATE_CLOSED; - /* Delete from the main lookup table to avoid more enqueues */ - rv = session_lookup_del_session (s); - if (rv) - clib_warning ("hash delete error, rv %d", rv); - + /* Delete from main lookup table before we axe the the transport */ + session_lookup_del_session (s); tp_vfts[session_get_transport_proto (s)].cleanup (s->connection_index, s->thread_index); + /* Since we called cleanup, no delete notification will come. So, make + * sure the session is properly freed. */ + segment_manager_dealloc_fifos (s->svm_segment_index, s->server_rx_fifo, + s->server_tx_fifo); + session_free (s); } transport_service_type_t diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 6908a568a0f..b57053c7f59 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -499,7 +499,7 @@ session_clone_safe (u32 session_index, u32 thread_index) transport_connection_t *session_get_transport (stream_session_t * s); -u32 stream_session_tx_fifo_max_dequeue (transport_connection_t * tc); +u32 session_tx_fifo_max_dequeue (transport_connection_t * tc); int session_enqueue_stream_connection (transport_connection_t * tc, diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index d6fcd911b08..2eea30be439 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -799,7 +799,8 @@ skip_dequeue: } break; case FIFO_EVENT_DISCONNECT: - /* Make sure stream disconnects run after the pending list is drained */ + /* Make sure stream disconnects run after the pending list is + * drained */ s0 = session_get_from_handle (e0->session_handle); if (!e0->postponed) { @@ -807,11 +808,9 @@ skip_dequeue: vec_add1 (smm->pending_disconnects[thread_index], *e0); continue; } - /* If tx queue is still not empty, wait a bit */ - if (svm_fifo_max_dequeue (s0->server_tx_fifo) - && e0->postponed < 200) + /* If tx queue is still not empty, wait */ + if (svm_fifo_max_dequeue (s0->server_tx_fifo)) { - e0->postponed += 1; vec_add1 (smm->pending_disconnects[thread_index], *e0); continue; } diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 15ac7d37edc..2a696f19d22 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -277,19 +277,19 @@ tcp_connection_reset (tcp_connection_t * tc) tcp_connection_cleanup (tc); break; case TCP_STATE_ESTABLISHED: + tcp_connection_timers_reset (tc); + /* Set the cleanup timer, in case the session layer/app don't + * cleanly close the connection */ + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); stream_session_reset_notify (&tc->connection); - /* fall through */ + break; case TCP_STATE_CLOSE_WAIT: case TCP_STATE_FIN_WAIT_1: case TCP_STATE_FIN_WAIT_2: case TCP_STATE_CLOSING: tc->state = TCP_STATE_CLOSED; TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc); - - /* Make sure all timers are cleared */ tcp_connection_timers_reset (tc); - - /* Wait for cleanup from session layer but not forever */ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); break; case TCP_STATE_CLOSED: @@ -326,17 +326,22 @@ tcp_connection_close (tcp_connection_t * tc) tc->state = TCP_STATE_FIN_WAIT_1; break; case TCP_STATE_ESTABLISHED: - if (!stream_session_tx_fifo_max_dequeue (&tc->connection)) + if (!session_tx_fifo_max_dequeue (&tc->connection)) tcp_send_fin (tc); else tc->flags |= TCP_CONN_FINPNDG; tc->state = TCP_STATE_FIN_WAIT_1; break; case TCP_STATE_CLOSE_WAIT: - tcp_send_fin (tc); - tcp_connection_timers_reset (tc); - tc->state = TCP_STATE_LAST_ACK; - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + if (!session_tx_fifo_max_dequeue (&tc->connection)) + { + tcp_send_fin (tc); + tcp_connection_timers_reset (tc); + tc->state = TCP_STATE_LAST_ACK; + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + } + else + tc->flags |= TCP_CONN_FINPNDG; break; case TCP_STATE_FIN_WAIT_1: tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); @@ -367,11 +372,9 @@ tcp_session_cleanup (u32 conn_index, u32 thread_index) tcp_connection_t *tc; tc = tcp_connection_get (conn_index, thread_index); tcp_connection_timers_reset (tc); - - /* Wait for the session tx events to clear */ tc->state = TCP_STATE_CLOSED; TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc); - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_connection_cleanup (tc); } /** diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 10aa721a4eb..3a31234876e 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -787,7 +787,7 @@ tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer) #define tcp_validate_txf_size(_tc, _a) \ ASSERT(_tc->state != TCP_STATE_ESTABLISHED \ - || stream_session_tx_fifo_max_dequeue (&_tc->connection) >= _a) + || session_tx_fifo_max_dequeue (&_tc->connection) >= _a) void scoreboard_remove_hole (sack_scoreboard_t * sb, diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 04612f885f2..f77d4845da2 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -2474,7 +2474,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (tc0->flags & TCP_CONN_FINPNDG) { /* TX fifo finally drained */ - if (!stream_session_tx_fifo_max_dequeue (&tc0->connection)) + if (!session_tx_fifo_max_dequeue (&tc0->connection)) tcp_send_fin (tc0); } /* If FIN is ACKed */ @@ -2507,6 +2507,18 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tcp_maybe_inc_counter (rcv_process, error0, 1); goto drop; } + if (tc0->flags & TCP_CONN_FINPNDG) + { + /* TX fifo finally drained */ + if (!session_tx_fifo_max_dequeue (&tc0->connection)) + { + tcp_send_fin (tc0); + tcp_connection_timers_reset (tc0); + tc0->state = TCP_STATE_LAST_ACK; + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, + TCP_2MSL_TIME); + } + } break; case TCP_STATE_CLOSING: /* In addition to the processing for the ESTABLISHED state, if @@ -2545,13 +2557,9 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc0->state = TCP_STATE_CLOSED; TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0); - tcp_connection_timers_reset (tc0); - - /* Don't delete the connection/session yet. Instead, wait a - * reasonable amount of time until the pipes are cleared. In - * particular, this makes sure that we won't have dead sessions - * when processing events on the tx path */ - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + /* Delete the connection/session since the pipes should be + * clear by now */ + tcp_connection_del (tc0); goto drop; diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 91c0e90bb35..8a88bf80b13 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -390,6 +390,7 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, case TCP_STATE_ESTABLISHED: case TCP_STATE_FIN_WAIT_1: case TCP_STATE_CLOSED: + case TCP_STATE_CLOSE_WAIT: return tcp_make_established_options (tc, opts); case TCP_STATE_SYN_RCVD: return tcp_make_synack_options (tc, opts); @@ -1213,7 +1214,7 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, /* * Make sure we can retransmit something */ - available_bytes = stream_session_tx_fifo_max_dequeue (&tc->connection); + available_bytes = session_tx_fifo_max_dequeue (&tc->connection); ASSERT (available_bytes >= offset); available_bytes -= offset; if (!available_bytes) @@ -1554,7 +1555,7 @@ tcp_timer_persist_handler (u32 index) || tc->snd_wnd > tc->snd_mss || tcp_in_recovery (tc)) return; - available_bytes = stream_session_tx_fifo_max_dequeue (&tc->connection); + available_bytes = session_tx_fifo_max_dequeue (&tc->connection); offset = tc->snd_una_max - tc->snd_una; /* Reprogram persist if no new bytes available to send. We may have data -- cgit 1.2.3-korg