aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2019-03-31 17:17:11 -0700
committerDave Barach <openvpp@barachs.net>2019-04-02 16:48:39 +0000
commitf65074e4df47d05238e051615dbaf5d2bcbaddf2 (patch)
tree5498cb0d07c0c19cd530ef17f35a5607c1319136
parentc6c4394dbd4a128d07d0a0a72c4db7d132f4474e (diff)
tcp: improve rcv process ack processing
- Avoid doing cc in closing states. - Rest connections closed with unread data Change-Id: I97d46b0459f03ea5439eeb0f233b6c17d3e06dfd Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r--src/vnet/session/session.h7
-rw-r--r--src/vnet/session/transport.c2
-rw-r--r--src/vnet/tcp/tcp.c9
-rw-r--r--src/vnet/tcp/tcp.h1
-rw-r--r--src/vnet/tcp/tcp_input.c136
-rw-r--r--src/vnet/tcp/tcp_output.c1
6 files changed, 87 insertions, 69 deletions
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index a3b84a6c8ef..ed42e5476e9 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -389,6 +389,13 @@ transport_max_tx_dequeue (transport_connection_t * tc)
}
always_inline u32
+transport_max_rx_dequeue (transport_connection_t * tc)
+{
+ session_t *s = session_get (tc->s_index, tc->thread_index);
+ return svm_fifo_max_dequeue (s->rx_fifo);
+}
+
+always_inline u32
transport_rx_fifo_size (transport_connection_t * tc)
{
session_t *s = session_get (tc->s_index, tc->thread_index);
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index abab0865674..d83ecfb133c 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -49,7 +49,7 @@ static double transport_pacer_period;
#define TRANSPORT_PACER_MIN_MSS 1460
#define TRANSPORT_PACER_MIN_BURST TRANSPORT_PACER_MIN_MSS
-#define TRANSPORT_PACER_MAX_BURST (48 * TRANSPORT_PACER_MIN_MSS)
+#define TRANSPORT_PACER_MAX_BURST (32 * TRANSPORT_PACER_MIN_MSS)
u8 *
format_transport_proto (u8 * s, va_list * args)
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index e262513687f..09c47d989ac 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -350,6 +350,15 @@ tcp_connection_close (tcp_connection_t * tc)
tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME);
break;
case TCP_STATE_ESTABLISHED:
+ /* If closing with unread data, reset the connection */
+ if (transport_max_rx_dequeue (&tc->connection))
+ {
+ tcp_send_reset (tc);
+ tcp_connection_timers_reset (tc);
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ break;
+ }
if (!transport_max_tx_dequeue (&tc->connection))
tcp_send_fin (tc);
else
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 8383d014276..46d72b74d74 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -126,6 +126,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
_(FRXT_FIRST, "Fast-retransmit first again") \
_(DEQ_PENDING, "Pending dequeue acked") \
_(PSH_PENDING, "PSH pending") \
+ _(FINRCVD, "FIN received") \
typedef enum _tcp_connection_flag_bits
{
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index cc630f8ae5f..9ac2d854ba5 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -405,11 +405,26 @@ error:
}
always_inline int
-tcp_rcv_ack_is_acceptable (tcp_connection_t * tc0, vlib_buffer_t * tb0)
+tcp_rcv_ack_no_cc (tcp_connection_t * tc, vlib_buffer_t * b, u32 * error)
{
/* SND.UNA =< SEG.ACK =< SND.NXT */
- return (seq_leq (tc0->snd_una, vnet_buffer (tb0)->tcp.ack_number)
- && seq_leq (vnet_buffer (tb0)->tcp.ack_number, tc0->snd_una_max));
+ if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number)
+ && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
+ {
+ if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_una_max))
+ {
+ tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
+ goto acceptable;
+ }
+ *error = TCP_ERROR_ACK_INVALID;
+ return -1;
+ }
+
+acceptable:
+ tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
+ tc->snd_una = vnet_buffer (b)->tcp.ack_number;
+ *error = TCP_ERROR_ACK_OK;
+ return 0;
}
/**
@@ -2703,24 +2718,24 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
switch (tc0->state)
{
case TCP_STATE_SYN_RCVD:
+
+ /* Make sure the segment is exactly right */
+ if (tc0->rcv_nxt != vnet_buffer (b0)->tcp.seq_number || is_fin0)
+ {
+ tcp_connection_reset (tc0);
+ error0 = TCP_ERROR_SEGMENT_INVALID;
+ goto drop;
+ }
+
/*
* If the segment acknowledgment is not acceptable, form a
* reset segment,
* <SEQ=SEG.ACK><CTL=RST>
* and send it.
*/
- if (!tcp_rcv_ack_is_acceptable (tc0, b0))
+ if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
{
tcp_connection_reset (tc0);
- error0 = TCP_ERROR_ACK_INVALID;
- goto drop;
- }
-
- /* Make sure the ack is exactly right */
- if (tc0->rcv_nxt != vnet_buffer (b0)->tcp.seq_number || is_fin0)
- {
- tcp_connection_reset (tc0);
- error0 = TCP_ERROR_SEGMENT_INVALID;
goto drop;
}
@@ -2774,12 +2789,22 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* If FIN is ACKed */
else if (tc0->snd_una == tc0->snd_nxt)
{
- tcp_connection_set_state (tc0, TCP_STATE_FIN_WAIT_2);
-
/* Stop all retransmit timers because we have nothing more
- * to send. Enable waitclose though because we're willing to
- * wait for peer's FIN but not indefinitely. */
+ * to send. */
tcp_connection_timers_reset (tc0);
+
+ /* We already have a FIN but didn't transition to CLOSING
+ * because of outstanding tx data. Close the connection. */
+ if (tc0->flags & TCP_CONN_FINRCVD)
+ {
+ tcp_connection_set_state (tc0, TCP_STATE_CLOSED);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ goto drop;
+ }
+
+ tcp_connection_set_state (tc0, TCP_STATE_FIN_WAIT_2);
+ /* Enable waitclose because we're willing to wait for peer's
+ * FIN but not indefinitely. */
tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
/* Don't try to deq the FIN acked */
@@ -2793,7 +2818,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* In addition to the processing for the ESTABLISHED state, if
* the retransmission queue is empty, the user's CLOSE can be
* acknowledged ("ok") but do not delete the TCB. */
- if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0))
+ if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
goto drop;
tc0->burst_acked = 0;
break;
@@ -2802,37 +2827,27 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0))
goto drop;
- if (tc0->flags & TCP_CONN_FINPNDG)
- {
- /* TX fifo finally drained */
- if (!transport_max_tx_dequeue (&tc0->connection))
- {
- tcp_send_fin (tc0);
- tcp_connection_timers_reset (tc0);
- tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
- }
- }
+ if (!(tc0->flags & TCP_CONN_FINPNDG))
+ break;
+
+ /* Still have outstanding tx data */
+ if (transport_max_tx_dequeue (&tc0->connection))
+ break;
+
+ tcp_send_fin (tc0);
+ tcp_connection_timers_reset (tc0);
+ tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
break;
case TCP_STATE_CLOSING:
/* In addition to the processing for the ESTABLISHED state, if
* the ACK acknowledges our FIN then enter the TIME-WAIT state,
* otherwise ignore the segment. */
- if (!tcp_rcv_ack_is_acceptable (tc0, b0))
- {
- error0 = TCP_ERROR_ACK_INVALID;
- goto drop;
- }
+ if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
+ goto drop;
- error0 = TCP_ERROR_ACK_OK;
- tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
- /* Ack moved snd_una beyond snd_nxt so reprogram fin */
- if (seq_gt (tc0->snd_una, tc0->snd_nxt))
- {
- tc0->snd_nxt = tc0->snd_una;
- tc0->flags &= ~TCP_CONN_FINSNT;
- goto drop;
- }
+ if (tc0->snd_una != tc0->snd_nxt)
+ goto drop;
tcp_connection_timers_reset (tc0);
tcp_connection_set_state (tc0, TCP_STATE_TIME_WAIT);
@@ -2845,13 +2860,9 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
* acknowledgment of our FIN. If our FIN is now acknowledged,
* delete the TCB, enter the CLOSED state, and return. */
- if (!tcp_rcv_ack_is_acceptable (tc0, b0))
- {
- error0 = TCP_ERROR_ACK_INVALID;
- goto drop;
- }
- error0 = TCP_ERROR_ACK_OK;
- tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
+ if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
+ goto drop;
+
/* Apparently our ACK for the peer's FIN was lost */
if (is_fin0 && tc0->snd_una != tc0->snd_nxt)
{
@@ -2875,7 +2886,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
* retransmission of the remote FIN. Acknowledge it, and restart
* the 2 MSL timeout. */
- if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0))
+ if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
goto drop;
if (!is_fin0)
@@ -2943,26 +2954,17 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
break;
case TCP_STATE_FIN_WAIT_1:
tc0->rcv_nxt += 1;
- tcp_connection_set_state (tc0, TCP_STATE_CLOSING);
+ /* If data is outstanding stay in FIN_WAIT_1 and try to finish
+ * sending it. */
if (tc0->flags & TCP_CONN_FINPNDG)
{
- /* Drop all outstanding tx data. */
- session_tx_fifo_dequeue_drop (&tc0->connection,
- transport_max_tx_dequeue
- (&tc0->connection));
- /* Make it look as if we've recovered, if needed */
- if (tcp_in_cong_recovery (tc0))
- {
- scoreboard_clear (&tc0->sack_sb);
- tcp_fastrecovery_off (tc0);
- tcp_recovery_off (tc0);
- tcp_connection_timers_reset (tc0);
- tc0->snd_nxt = tc0->snd_una;
- }
- tcp_send_fin (tc0);
+ tc0->flags |= TCP_CONN_FINRCVD;
}
else
- tcp_program_ack (wrk, tc0);
+ {
+ tcp_connection_set_state (tc0, TCP_STATE_CLOSING);
+ tcp_program_ack (wrk, tc0);
+ }
/* Wait for ACK for our FIN but not forever */
tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
break;
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 518a80de65e..03caa075a41 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1132,7 +1132,6 @@ tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
/* update_snd_nxt */ 1);
tc->snd_una_max = seq_max (tc->snd_nxt, tc->snd_una_max);
- ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
/* If not tracking an ACK, start tracking */
if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))