From b3dce89a768aaffa2a830ba6579cd3d9c8cd967a Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Wed, 30 Oct 2019 09:22:14 -0700 Subject: tcp: improve lost rxt heuristic Type: feature - retransmit first unacked segment if newer retransmitted packets are acked - avoid spurious retransmits if recovery ends with sacked bytes Change-Id: Ic1b56d22e025822edb7609afb136e47440ea6032 Signed-off-by: Florin Coras --- src/vnet/tcp/tcp.h | 10 +++++-- src/vnet/tcp/tcp_input.c | 74 +++++++++++++++++++++++++++-------------------- src/vnet/tcp/tcp_output.c | 5 ++++ 3 files changed, 55 insertions(+), 34 deletions(-) diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index f40388721b6..7dd88bf0a49 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -1172,8 +1172,14 @@ tcp_persist_timer_set (tcp_connection_t * tc) always_inline void tcp_persist_timer_update (tcp_connection_t * tc) { - tcp_timer_update (tc, TCP_TIMER_PERSIST, - clib_max (tc->rto * TCP_TO_TIMER_TICK, 1)); + u32 interval; + + if (seq_leq (tc->snd_una, tc->snd_congestion + tc->burst_acked)) + interval = 1; + else + interval = clib_max (tc->rto * TCP_TO_TIMER_TICK, 1); + + tcp_timer_update (tc, TCP_TIMER_PERSIST, interval); } always_inline void diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index bc78b39cb52..172dcd2ee6f 100755 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -578,10 +578,16 @@ tcp_estimate_initial_rtt (tcp_connection_t * tc) always_inline u8 tcp_recovery_no_snd_space (tcp_connection_t * tc) { - return (tcp_in_fastrecovery (tc) - && tcp_fastrecovery_prr_snd_space (tc) < tc->snd_mss) - || (tcp_in_recovery (tc) - && tcp_available_output_snd_space (tc) < tc->snd_mss); + u32 space; + + ASSERT (tcp_in_cong_recovery (tc)); + + if (tcp_in_recovery (tc)) + space = tcp_available_output_snd_space (tc); + else + space = tcp_fastrecovery_prr_snd_space (tc); + + return (space < tc->snd_mss + tc->burst_acked); } /** @@ -608,7 +614,6 @@ tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk) { /* Dequeue the newly ACKed bytes */ session_tx_fifo_dequeue_drop (&tc->connection, tc->burst_acked); - tc->burst_acked = 0; tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING)) @@ -628,9 +633,11 @@ tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk) /* Reset the pacer if we've been idle, i.e., no data sent or if * we're in recovery and snd space constrained */ if (tc->data_segs_out == tc->prev_dsegs_out - || tcp_recovery_no_snd_space (tc)) + || (tcp_in_cong_recovery (tc) && tcp_recovery_no_snd_space (tc))) transport_connection_tx_pacer_reset_bucket (&tc->connection); + tc->prev_dsegs_out = tc->data_segs_out; + tc->burst_acked = 0; } _vec_len (wrk->pending_deq_acked) = 0; } @@ -1348,28 +1355,25 @@ tcp_cc_recover (tcp_connection_t * tc) is_spurious = 1; } - tc->rcv_dupacks = 0; - tc->prr_delivered = 0; - tc->rxt_delivered = 0; - tc->snd_rxt_bytes = 0; - tc->snd_rxt_ts = 0; - tc->rtt_ts = 0; - tc->flags &= ~TCP_CONN_RXT_PENDING; - tcp_connection_tx_pacer_reset (tc, tc->cwnd, 0 /* start bucket */ ); + tc->rcv_dupacks = 0; /* Previous recovery left us congested. Continue sending as part * of the current recovery event with an updated snd_congestion */ if (tc->sack_sb.sacked_bytes) { tc->snd_congestion = tc->snd_nxt; - tc->snd_rxt_ts = tcp_tstamp (tc); - tc->prr_start = tc->snd_una; - scoreboard_init_rxt (&tc->sack_sb, tc->snd_una); tcp_program_retransmit (tc); return is_spurious; } + tc->rxt_delivered = 0; + tc->snd_rxt_bytes = 0; + tc->snd_rxt_ts = 0; + tc->prr_delivered = 0; + tc->rtt_ts = 0; + tc->flags &= ~TCP_CONN_RXT_PENDING; + hole = scoreboard_first_hole (&tc->sack_sb); if (hole && hole->start == tc->snd_una && hole->end == tc->snd_nxt) scoreboard_clear (&tc->sack_sb); @@ -1444,29 +1448,18 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs, } /* - * Already in recovery. See if we can exit and stop retransmitting + * Already in recovery */ - if (seq_geq (tc->snd_una, tc->snd_congestion)) - { - /* If spurious return, we've already updated everything */ - if (tcp_cc_recover (tc)) - { - tc->tsecr_last_ack = tc->rcv_opts.tsecr; - return; - } - - /* Treat as congestion avoidance ack */ - tcp_cc_rcv_ack (tc, rs); - return; - } - /* * Process (re)transmit feedback. Output path uses this to decide how much * more data to release into the network */ if (has_sack) { + if (!tc->bytes_acked && tc->sack_sb.rxt_sacked) + tcp_fastrecovery_first_on (tc); + tc->rxt_delivered += tc->sack_sb.rxt_sacked; tc->prr_delivered += tc->bytes_acked + tc->sack_sb.last_sacked_bytes - tc->sack_sb.last_bytes_delivered; @@ -1497,6 +1490,23 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs, tcp_program_retransmit (tc); } + /* + * See if we can exit and stop retransmitting + */ + if (seq_geq (tc->snd_una, tc->snd_congestion)) + { + /* If spurious return, we've already updated everything */ + if (tcp_cc_recover (tc)) + { + tc->tsecr_last_ack = tc->rcv_opts.tsecr; + return; + } + + /* Treat as congestion avoidance ack */ + tcp_cc_rcv_ack (tc, rs); + return; + } + /* * Notify cc of the event */ diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 42986112053..7be3de8e26b 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1850,6 +1850,9 @@ tcp_retransmit_should_retry_head (tcp_connection_t * tc, u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion; f64 rr = (f64) tc->ssthresh / tc->prev_cwnd; + if (tcp_fastrecovery_first (tc)) + return 1; + return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr); } @@ -1928,6 +1931,8 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes); } + tcp_fastrecovery_first_off (tc); + TCP_EVT (TCP_EVT_CC_EVT, tc, 0); hole = scoreboard_get_hole (sb, sb->cur_rxt_hole); -- cgit 1.2.3-korg