aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp/tcp_output.c
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2018-10-19 16:26:24 -0700
committerDamjan Marion <dmarion@me.com>2018-10-23 19:35:51 +0000
commitbf4d5ce58435d3f424749ff69650ea67ce778f04 (patch)
tree35470ff4780c23a60ff0bf8c2680c73be7f6978f /src/vnet/tcp/tcp_output.c
parentf87eb9b3722f38112de6bc823527438e2a521bc0 (diff)
tcp: fast retransmit improvements
Patch is too large to be ported to 18.10 just days before release. - handle fast retransmits outside of established node and limit the retransmit burst size to avoid tx losses and worsening congestion. - in the absance of a tx pacer, use slow start after fast retransmit exists - add fast retransmit heuristic that re-retries sending the first segment if everything else fails - fine tuning Change-Id: I84a2ab8fbba8b97f1d2b26584dc11a1e2c33c8d2 Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp/tcp_output.c')
-rw-r--r--src/vnet/tcp/tcp_output.c78
1 files changed, 57 insertions, 21 deletions
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index ed1c641d80f..2e6036b410a 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1409,7 +1409,11 @@ tcp_rxt_timeout_cc (tcp_connection_t * tc)
/* Cleanly recover cc (also clears up fast retransmit) */
if (tcp_in_fastrecovery (tc))
- tcp_cc_fastrecovery_exit (tc);
+ {
+ /* TODO be less aggressive about this */
+ scoreboard_clear (&tc->sack_sb);
+ tcp_cc_fastrecovery_exit (tc);
+ }
/* Start again from the beginning */
tc->cc_algo->congestion (tc);
@@ -1487,6 +1491,8 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
/* First retransmit timeout */
if (tc->rto_boff == 1)
tcp_rxt_timeout_cc (tc);
+ else
+ scoreboard_clear (&tc->sack_sb);
/* If we've sent beyond snd_congestion, update it */
if (seq_gt (tc->snd_una_max, tc->snd_congestion))
@@ -1499,9 +1505,6 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
* shortfall */
n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b);
- /* TODO be less aggressive about this */
- scoreboard_clear (&tc->sack_sb);
-
if (n_bytes == 0)
{
tcp_retransmit_timer_force_update (tc);
@@ -1680,7 +1683,7 @@ tcp_timer_persist_handler (u32 index)
/**
* Retransmit first unacked segment
*/
-void
+int
tcp_retransmit_first_unacked (tcp_connection_t * tc)
{
vlib_main_t *vm = vlib_get_main ();
@@ -1691,20 +1694,23 @@ tcp_retransmit_first_unacked (tcp_connection_t * tc)
tc->snd_nxt = tc->snd_una;
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2);
+
n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b);
if (!n_bytes)
- return;
+ return -1;
+
bi = vlib_get_buffer_index (vm, b);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
-
tc->snd_nxt = old_snd_nxt;
+
+ return 0;
}
/**
* Do fast retransmit with SACKs
*/
-void
-tcp_fast_retransmit_sack (tcp_connection_t * tc)
+int
+tcp_fast_retransmit_sack (tcp_connection_t * tc, u32 burst_size)
{
vlib_main_t *vm = vlib_get_main ();
u32 n_written = 0, offset, max_bytes, n_segs = 0;
@@ -1720,13 +1726,16 @@ tcp_fast_retransmit_sack (tcp_connection_t * tc)
old_snd_nxt = tc->snd_nxt;
sb = &tc->sack_sb;
snd_space = tcp_available_cc_snd_space (tc);
+ hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
if (snd_space < tc->snd_mss)
- goto done;
+ {
+ tcp_program_fastretransmit (tc);
+ goto done;
+ }
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
- hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
- while (hole && snd_space > 0 && n_segs++ < VLIB_FRAME_SIZE)
+ while (snd_space > 0 && n_segs < burst_size)
{
hole = scoreboard_next_rxt_hole (sb, hole,
tcp_fastrecovery_sent_1_smss (tc),
@@ -1736,7 +1745,21 @@ tcp_fast_retransmit_sack (tcp_connection_t * tc)
if (!can_rescue || !(seq_lt (sb->rescue_rxt, tc->snd_una)
|| seq_gt (sb->rescue_rxt,
tc->snd_congestion)))
- break;
+ {
+ if (tcp_fastrecovery_first (tc))
+ break;
+
+ /* We tend to lose the first segment. Try re-resending
+ * it but only once and after we've tried everything */
+ hole = scoreboard_first_hole (sb);
+ if (hole && hole->start == tc->snd_una)
+ {
+ tcp_retransmit_first_unacked (tc);
+ tcp_fastrecovery_first_on (tc);
+ n_segs += 1;
+ }
+ break;
+ }
/* If rescue rxt undefined or less than snd_una then one segment of
* up to SMSS octets that MUST include the highest outstanding
@@ -1756,6 +1779,7 @@ tcp_fast_retransmit_sack (tcp_connection_t * tc)
bi = vlib_get_buffer_index (vm, b);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+ n_segs += 1;
break;
}
@@ -1776,22 +1800,27 @@ tcp_fast_retransmit_sack (tcp_connection_t * tc)
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
ASSERT (n_written <= snd_space);
snd_space -= n_written;
+ n_segs += 1;
}
+ if (hole)
+ tcp_program_fastretransmit (tc);
+
done:
/* If window allows, send 1 SMSS of new data */
tc->snd_nxt = old_snd_nxt;
+ return n_segs;
}
/**
* Fast retransmit without SACK info
*/
-void
-tcp_fast_retransmit_no_sack (tcp_connection_t * tc)
+int
+tcp_fast_retransmit_no_sack (tcp_connection_t * tc, u32 burst_size)
{
vlib_main_t *vm = vlib_get_main ();
u32 n_written = 0, offset = 0, bi, old_snd_nxt;
- int snd_space;
+ int snd_space, n_segs = 0;
vlib_buffer_t *b;
ASSERT (tcp_in_fastrecovery (tc));
@@ -1802,7 +1831,7 @@ tcp_fast_retransmit_no_sack (tcp_connection_t * tc)
tc->snd_nxt = tc->snd_una;
snd_space = tcp_available_cc_snd_space (tc);
- while (snd_space > 0)
+ while (snd_space > 0 && n_segs < burst_size)
{
offset += n_written;
n_written = tcp_prepare_retransmit_segment (tc, offset, snd_space, &b);
@@ -1814,22 +1843,29 @@ tcp_fast_retransmit_no_sack (tcp_connection_t * tc)
bi = vlib_get_buffer_index (vm, b);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
snd_space -= n_written;
+ n_segs += 1;
}
+ /* More data to resend */
+ if (seq_lt (tc->snd_nxt, tc->snd_congestion))
+ tcp_program_fastretransmit (tc);
+
/* Restore snd_nxt. If window allows, send 1 SMSS of new data */
tc->snd_nxt = old_snd_nxt;
+
+ return n_segs;
}
/**
* Do fast retransmit
*/
-void
-tcp_fast_retransmit (tcp_connection_t * tc)
+int
+tcp_fast_retransmit (tcp_connection_t * tc, u32 burst_size)
{
if (tcp_opts_sack_permitted (&tc->rcv_opts))
- tcp_fast_retransmit_sack (tc);
+ return tcp_fast_retransmit_sack (tc, burst_size);
else
- tcp_fast_retransmit_no_sack (tc);
+ return tcp_fast_retransmit_no_sack (tc, burst_size);
}
static u32