diff options
author | Florin Coras <fcoras@cisco.com> | 2019-03-31 17:17:11 -0700 |
---|---|---|
committer | Dave Barach <openvpp@barachs.net> | 2019-04-02 16:48:39 +0000 |
commit | f65074e4df47d05238e051615dbaf5d2bcbaddf2 (patch) | |
tree | 5498cb0d07c0c19cd530ef17f35a5607c1319136 /src/vnet | |
parent | c6c4394dbd4a128d07d0a0a72c4db7d132f4474e (diff) |
tcp: improve rcv process ack processing
- Avoid doing cc in closing states.
- Rest connections closed with unread data
Change-Id: I97d46b0459f03ea5439eeb0f233b6c17d3e06dfd
Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet')
-rw-r--r-- | src/vnet/session/session.h | 7 | ||||
-rw-r--r-- | src/vnet/session/transport.c | 2 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.c | 9 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.h | 1 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_input.c | 136 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 1 |
6 files changed, 87 insertions, 69 deletions
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index a3b84a6c8ef..ed42e5476e9 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -389,6 +389,13 @@ transport_max_tx_dequeue (transport_connection_t * tc) } always_inline u32 +transport_max_rx_dequeue (transport_connection_t * tc) +{ + session_t *s = session_get (tc->s_index, tc->thread_index); + return svm_fifo_max_dequeue (s->rx_fifo); +} + +always_inline u32 transport_rx_fifo_size (transport_connection_t * tc) { session_t *s = session_get (tc->s_index, tc->thread_index); diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index abab0865674..d83ecfb133c 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -49,7 +49,7 @@ static double transport_pacer_period; #define TRANSPORT_PACER_MIN_MSS 1460 #define TRANSPORT_PACER_MIN_BURST TRANSPORT_PACER_MIN_MSS -#define TRANSPORT_PACER_MAX_BURST (48 * TRANSPORT_PACER_MIN_MSS) +#define TRANSPORT_PACER_MAX_BURST (32 * TRANSPORT_PACER_MIN_MSS) u8 * format_transport_proto (u8 * s, va_list * args) diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index e262513687f..09c47d989ac 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -350,6 +350,15 @@ tcp_connection_close (tcp_connection_t * tc) tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME); break; case TCP_STATE_ESTABLISHED: + /* If closing with unread data, reset the connection */ + if (transport_max_rx_dequeue (&tc->connection)) + { + tcp_send_reset (tc); + tcp_connection_timers_reset (tc); + tcp_connection_set_state (tc, TCP_STATE_CLOSED); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); + break; + } if (!transport_max_tx_dequeue (&tc->connection)) tcp_send_fin (tc); else diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 8383d014276..46d72b74d74 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -126,6 +126,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; _(FRXT_FIRST, "Fast-retransmit first again") \ _(DEQ_PENDING, "Pending dequeue acked") \ _(PSH_PENDING, "PSH pending") \ + _(FINRCVD, "FIN received") \ typedef enum _tcp_connection_flag_bits { diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index cc630f8ae5f..9ac2d854ba5 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -405,11 +405,26 @@ error: } always_inline int -tcp_rcv_ack_is_acceptable (tcp_connection_t * tc0, vlib_buffer_t * tb0) +tcp_rcv_ack_no_cc (tcp_connection_t * tc, vlib_buffer_t * b, u32 * error) { /* SND.UNA =< SEG.ACK =< SND.NXT */ - return (seq_leq (tc0->snd_una, vnet_buffer (tb0)->tcp.ack_number) - && seq_leq (vnet_buffer (tb0)->tcp.ack_number, tc0->snd_una_max)); + if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number) + && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt))) + { + if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_una_max)) + { + tc->snd_nxt = vnet_buffer (b)->tcp.ack_number; + goto acceptable; + } + *error = TCP_ERROR_ACK_INVALID; + return -1; + } + +acceptable: + tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una; + tc->snd_una = vnet_buffer (b)->tcp.ack_number; + *error = TCP_ERROR_ACK_OK; + return 0; } /** @@ -2703,24 +2718,24 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, switch (tc0->state) { case TCP_STATE_SYN_RCVD: + + /* Make sure the segment is exactly right */ + if (tc0->rcv_nxt != vnet_buffer (b0)->tcp.seq_number || is_fin0) + { + tcp_connection_reset (tc0); + error0 = TCP_ERROR_SEGMENT_INVALID; + goto drop; + } + /* * If the segment acknowledgment is not acceptable, form a * reset segment, * <SEQ=SEG.ACK><CTL=RST> * and send it. */ - if (!tcp_rcv_ack_is_acceptable (tc0, b0)) + if (tcp_rcv_ack_no_cc (tc0, b0, &error0)) { tcp_connection_reset (tc0); - error0 = TCP_ERROR_ACK_INVALID; - goto drop; - } - - /* Make sure the ack is exactly right */ - if (tc0->rcv_nxt != vnet_buffer (b0)->tcp.seq_number || is_fin0) - { - tcp_connection_reset (tc0); - error0 = TCP_ERROR_SEGMENT_INVALID; goto drop; } @@ -2774,12 +2789,22 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* If FIN is ACKed */ else if (tc0->snd_una == tc0->snd_nxt) { - tcp_connection_set_state (tc0, TCP_STATE_FIN_WAIT_2); - /* Stop all retransmit timers because we have nothing more - * to send. Enable waitclose though because we're willing to - * wait for peer's FIN but not indefinitely. */ + * to send. */ tcp_connection_timers_reset (tc0); + + /* We already have a FIN but didn't transition to CLOSING + * because of outstanding tx data. Close the connection. */ + if (tc0->flags & TCP_CONN_FINRCVD) + { + tcp_connection_set_state (tc0, TCP_STATE_CLOSED); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + goto drop; + } + + tcp_connection_set_state (tc0, TCP_STATE_FIN_WAIT_2); + /* Enable waitclose because we're willing to wait for peer's + * FIN but not indefinitely. */ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); /* Don't try to deq the FIN acked */ @@ -2793,7 +2818,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* In addition to the processing for the ESTABLISHED state, if * the retransmission queue is empty, the user's CLOSE can be * acknowledged ("ok") but do not delete the TCB. */ - if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0)) + if (tcp_rcv_ack_no_cc (tc0, b0, &error0)) goto drop; tc0->burst_acked = 0; break; @@ -2802,37 +2827,27 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0)) goto drop; - if (tc0->flags & TCP_CONN_FINPNDG) - { - /* TX fifo finally drained */ - if (!transport_max_tx_dequeue (&tc0->connection)) - { - tcp_send_fin (tc0); - tcp_connection_timers_reset (tc0); - tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK); - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); - } - } + if (!(tc0->flags & TCP_CONN_FINPNDG)) + break; + + /* Still have outstanding tx data */ + if (transport_max_tx_dequeue (&tc0->connection)) + break; + + tcp_send_fin (tc0); + tcp_connection_timers_reset (tc0); + tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); break; case TCP_STATE_CLOSING: /* In addition to the processing for the ESTABLISHED state, if * the ACK acknowledges our FIN then enter the TIME-WAIT state, * otherwise ignore the segment. */ - if (!tcp_rcv_ack_is_acceptable (tc0, b0)) - { - error0 = TCP_ERROR_ACK_INVALID; - goto drop; - } + if (tcp_rcv_ack_no_cc (tc0, b0, &error0)) + goto drop; - error0 = TCP_ERROR_ACK_OK; - tc0->snd_una = vnet_buffer (b0)->tcp.ack_number; - /* Ack moved snd_una beyond snd_nxt so reprogram fin */ - if (seq_gt (tc0->snd_una, tc0->snd_nxt)) - { - tc0->snd_nxt = tc0->snd_una; - tc0->flags &= ~TCP_CONN_FINSNT; - goto drop; - } + if (tc0->snd_una != tc0->snd_nxt) + goto drop; tcp_connection_timers_reset (tc0); tcp_connection_set_state (tc0, TCP_STATE_TIME_WAIT); @@ -2845,13 +2860,9 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * acknowledgment of our FIN. If our FIN is now acknowledged, * delete the TCB, enter the CLOSED state, and return. */ - if (!tcp_rcv_ack_is_acceptable (tc0, b0)) - { - error0 = TCP_ERROR_ACK_INVALID; - goto drop; - } - error0 = TCP_ERROR_ACK_OK; - tc0->snd_una = vnet_buffer (b0)->tcp.ack_number; + if (tcp_rcv_ack_no_cc (tc0, b0, &error0)) + goto drop; + /* Apparently our ACK for the peer's FIN was lost */ if (is_fin0 && tc0->snd_una != tc0->snd_nxt) { @@ -2875,7 +2886,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * retransmission of the remote FIN. Acknowledge it, and restart * the 2 MSL timeout. */ - if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0)) + if (tcp_rcv_ack_no_cc (tc0, b0, &error0)) goto drop; if (!is_fin0) @@ -2943,26 +2954,17 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, break; case TCP_STATE_FIN_WAIT_1: tc0->rcv_nxt += 1; - tcp_connection_set_state (tc0, TCP_STATE_CLOSING); + /* If data is outstanding stay in FIN_WAIT_1 and try to finish + * sending it. */ if (tc0->flags & TCP_CONN_FINPNDG) { - /* Drop all outstanding tx data. */ - session_tx_fifo_dequeue_drop (&tc0->connection, - transport_max_tx_dequeue - (&tc0->connection)); - /* Make it look as if we've recovered, if needed */ - if (tcp_in_cong_recovery (tc0)) - { - scoreboard_clear (&tc0->sack_sb); - tcp_fastrecovery_off (tc0); - tcp_recovery_off (tc0); - tcp_connection_timers_reset (tc0); - tc0->snd_nxt = tc0->snd_una; - } - tcp_send_fin (tc0); + tc0->flags |= TCP_CONN_FINRCVD; } else - tcp_program_ack (wrk, tc0); + { + tcp_connection_set_state (tc0, TCP_STATE_CLOSING); + tcp_program_ack (wrk, tc0); + } /* Wait for ACK for our FIN but not forever */ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); break; diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 518a80de65e..03caa075a41 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1132,7 +1132,6 @@ tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b) tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1, /* update_snd_nxt */ 1); tc->snd_una_max = seq_max (tc->snd_nxt, tc->snd_una_max); - ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd)); tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); /* If not tracking an ACK, start tracking */ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc)) |