diff options
author | Florin Coras <fcoras@cisco.com> | 2019-09-12 18:36:44 -0700 |
---|---|---|
committer | John Lo <loj@cisco.com> | 2019-09-25 14:54:42 +0000 |
commit | 36ebcfffbc7ab0e83b4bb8dfaec16bf16cafb954 (patch) | |
tree | fe9803af08fd55cf1c9419d6e58f45394e155a7b /src/vnet | |
parent | 54c93cfc2556d9c6d2cf472f51d4c2866a556ef6 (diff) |
tcp: use sacks for timer based recovery
Type: feature
If available, reuse sack scoreboard in timer triggered retransmit to
minimize spurious retransmits.
Additional changes/refactoring:
- limited transmit updates
- add sacked rxt count to scoreboard
- prr pacing of fast retransmits
- startup pacing updates
- changed loss window to flight + mss
Change-Id: I057de6a9d6401698bd1031d5cf5cfbb62f2bdf61
Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet')
-rw-r--r-- | src/vnet/session/transport.c | 18 | ||||
-rw-r--r-- | src/vnet/session/transport.h | 9 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.c | 45 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.h | 52 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_cubic.c | 8 | ||||
-rwxr-xr-x | src/vnet/tcp/tcp_input.c | 286 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_newreno.c | 6 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 188 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_packet.h | 3 |
9 files changed, 337 insertions, 278 deletions
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index effd5891e0b..3e723994752 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -578,7 +578,8 @@ spacer_max_burst (spacer_t * pacer, u64 norm_time_now) u64 n_periods = norm_time_now - pacer->last_update; u64 inc; - if (n_periods > 0 && (inc = n_periods * pacer->tokens_per_period) > 10) + if (n_periods > 0 + && (inc = (f32) n_periods * pacer->tokens_per_period) > 10) { pacer->last_update = norm_time_now; pacer->bucket = clib_min (pacer->bucket + inc, pacer->bytes_per_sec); @@ -608,6 +609,13 @@ spacer_pace_rate (spacer_t * pacer) return pacer->bytes_per_sec; } +static inline void +spacer_reset_bucket (spacer_t * pacer, u64 norm_time_now) +{ + pacer->last_update = norm_time_now; + pacer->bucket = 0; +} + void transport_connection_tx_pacer_reset (transport_connection_t * tc, u32 rate_bytes_per_sec, @@ -646,6 +654,14 @@ transport_connection_tx_pacer_burst (transport_connection_t * tc, return spacer_max_burst (&tc->pacer, time_now); } +void +transport_connection_tx_pacer_reset_bucket (transport_connection_t * tc, + u64 time_now) +{ + time_now >>= SPACER_CPU_TICKS_PER_PERIOD_SHIFT; + spacer_reset_bucket (&tc->pacer, time_now); +} + u32 transport_connection_snd_space (transport_connection_t * tc, u64 time_now, u16 mss) diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index cbe3c36734c..5b45be0a0be 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -220,6 +220,15 @@ u32 transport_connection_tx_pacer_burst (transport_connection_t * tc, u64 transport_connection_tx_pacer_rate (transport_connection_t * tc); /** + * Reset tx pacer bucket + * + * @param tc transport connection + * @param time_now current cpu time + */ +void transport_connection_tx_pacer_reset_bucket (transport_connection_t * tc, + u64 time_now); + +/** * Initialize period for tx pacers * * Defines a unit of time with respect to number of cpu cycles that is to diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 12e241984cf..75a45a448bd 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -701,17 +701,15 @@ tcp_init_snd_vars (tcp_connection_t * tc) tc->snd_una = tc->iss; tc->snd_nxt = tc->iss + 1; tc->snd_una_max = tc->snd_nxt; - tc->srtt = 0; + tc->srtt = 100; /* 100 ms */ } void tcp_enable_pacing (tcp_connection_t * tc) { - u32 initial_bucket, byte_rate; - initial_bucket = 16 * tc->snd_mss; - byte_rate = 2 << 16; - transport_connection_tx_pacer_init (&tc->connection, byte_rate, - initial_bucket); + u32 byte_rate; + byte_rate = tc->cwnd / (tc->srtt * TCP_TICK); + transport_connection_tx_pacer_init (&tc->connection, byte_rate, tc->cwnd); tc->mrtt_us = (u32) ~ 0; } @@ -725,10 +723,11 @@ tcp_connection_init_vars (tcp_connection_t * tc) tcp_connection_timers_init (tc); tcp_init_mss (tc); scoreboard_init (&tc->sack_sb); - tcp_cc_init (tc); if (tc->state == TCP_STATE_SYN_RCVD) tcp_init_snd_vars (tc); + tcp_cc_init (tc); + if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6)) tcp_add_del_adjacency (tc, 1); @@ -921,7 +920,7 @@ format_tcp_congestion (u8 * s, va_list * args) s = format (s, "%U ", format_tcp_congestion_status, tc); s = format (s, "algo %s cwnd %u ssthresh %u bytes_acked %u\n", tc->cc_algo->name, tc->cwnd, tc->ssthresh, tc->bytes_acked); - s = format (s, "%Ucc space %u prev_cwnd %u prev_ssthresh %u rtx_bytes %u\n", + s = format (s, "%Ucc space %u prev_cwnd %u prev_ssthresh %u rxt_bytes %u\n", format_white_space, indent, tcp_available_cc_snd_space (tc), tc->prev_cwnd, tc->prev_ssthresh, tc->snd_rxt_bytes); s = format (s, "%Usnd_congestion %u dupack %u limited_transmit %u\n", @@ -1141,8 +1140,9 @@ format_tcp_scoreboard (u8 * s, va_list * args) sack_scoreboard_hole_t *hole; u32 indent = format_get_indent (s); - s = format (s, "sacked_bytes %u last_sacked_bytes %u lost_bytes %u\n", - sb->sacked_bytes, sb->last_sacked_bytes, sb->lost_bytes); + s = format (s, "sacked %u last_sacked %u lost %u last_lost %u\n", + sb->sacked_bytes, sb->last_sacked_bytes, sb->lost_bytes, + sb->last_lost_bytes); s = format (s, "%Ulast_bytes_delivered %u high_sacked %u is_reneging %u\n", format_white_space, indent, sb->last_bytes_delivered, sb->high_sacked - tc->iss, sb->is_reneging); @@ -1247,7 +1247,7 @@ tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space) static inline u32 tcp_snd_space_inline (tcp_connection_t * tc) { - int snd_space, snt_limited; + int snd_space; if (PREDICT_FALSE (tcp_in_fastrecovery (tc) || tc->state == TCP_STATE_CLOSED)) @@ -1255,18 +1255,21 @@ tcp_snd_space_inline (tcp_connection_t * tc) snd_space = tcp_available_output_snd_space (tc); - /* If we haven't gotten dupacks or if we did and have gotten sacked - * bytes then we can still send as per Limited Transmit (RFC3042) */ - if (PREDICT_FALSE (tc->rcv_dupacks != 0 - && (tcp_opts_sack_permitted (tc) - && tc->sack_sb.last_sacked_bytes == 0))) + /* If we got dupacks or sacked bytes but we're not yet in recovery, try + * to force the peer to send enough dupacks to start retransmitting as + * per Limited Transmit (RFC3042) + */ + if (PREDICT_FALSE (tc->rcv_dupacks != 0 || tc->sack_sb.sacked_bytes)) { - if (tc->rcv_dupacks == 1 && tc->limited_transmit != tc->snd_nxt) + if (tc->limited_transmit != tc->snd_nxt + && (seq_lt (tc->limited_transmit, tc->snd_nxt - 2 * tc->snd_mss) + || seq_gt (tc->limited_transmit, tc->snd_nxt))) tc->limited_transmit = tc->snd_nxt; + ASSERT (seq_leq (tc->limited_transmit, tc->snd_nxt)); - snt_limited = tc->snd_nxt - tc->limited_transmit; - snd_space = clib_max (2 * tc->snd_mss - snt_limited, 0); + int snt_limited = tc->snd_nxt - tc->limited_transmit; + snd_space = clib_max ((int) 2 * tc->snd_mss - snt_limited, 0); } return tcp_round_snd_space (tc, snd_space); } @@ -1360,9 +1363,9 @@ tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window, u32 start_bucket) { tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); - u32 byte_rate = window / ((f64) TCP_TICK * tc->srtt); + f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); u64 last_time = wrk->vm->clib_time.last_cpu_time; - transport_connection_tx_pacer_reset (&tc->connection, byte_rate, + transport_connection_tx_pacer_reset (&tc->connection, window / srtt, start_bucket, last_time); } diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 227b3f8a856..a1b7d4cbd0d 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -32,6 +32,7 @@ #define TCP_MAX_OPTION_SPACE 40 #define TCP_CC_DATA_SZ 24 #define TCP_MAX_GSO_SZ 65536 +#define TCP_RXT_MAX_BURST 10 #define TCP_DUPACK_THRESHOLD 3 #define TCP_IW_N_SEGMENTS 10 @@ -111,7 +112,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; _(DCNT_PENDING, "Disconnect pending") \ _(HALF_OPEN_DONE, "Half-open completed") \ _(FINPNDG, "FIN pending") \ - _(FRXT_PENDING, "Fast-retransmit pending") \ + _(RXT_PENDING, "Retransmit pending") \ _(FRXT_FIRST, "Fast-retransmit first again") \ _(DEQ_PENDING, "Pending dequeue acked") \ _(PSH_PENDING, "PSH pending") \ @@ -166,6 +167,7 @@ typedef struct _sack_scoreboard u32 sacked_bytes; /**< Number of bytes sacked in sb */ u32 last_sacked_bytes; /**< Number of bytes last sacked */ u32 last_bytes_delivered; /**< Sack bytes delivered to app */ + u32 rxt_sacked; /**< Rxt last delivered */ u32 high_sacked; /**< Highest byte sacked (fack) */ u32 high_rxt; /**< Highest retransmitted sequence */ u32 rescue_rxt; /**< Rescue sequence number */ @@ -219,8 +221,11 @@ sack_scoreboard_hole_t *scoreboard_prev_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole); sack_scoreboard_hole_t *scoreboard_first_hole (sack_scoreboard_t * sb); sack_scoreboard_hole_t *scoreboard_last_hole (sack_scoreboard_t * sb); + void scoreboard_clear (sack_scoreboard_t * sb); +void scoreboard_clear_reneging (sack_scoreboard_t * sb, u32 start, u32 end); void scoreboard_init (sack_scoreboard_t * sb); +void scoreboard_init_high_rxt (sack_scoreboard_t * sb, u32 snd_una); u8 *format_tcp_scoreboard (u8 * s, va_list * args); #define TCP_BTS_INVALID_INDEX ((u32)~0) @@ -360,8 +365,10 @@ typedef struct _tcp_connection u32 prev_cwnd; /**< ssthresh before congestion */ u32 bytes_acked; /**< Bytes acknowledged by current segment */ u32 burst_acked; /**< Bytes acknowledged in current burst */ - u32 snd_rxt_bytes; /**< Retransmitted bytes */ + u32 snd_rxt_bytes; /**< Retransmitted bytes during current cc event */ u32 snd_rxt_ts; /**< Timestamp when first packet is retransmitted */ + u32 prr_delivered; /**< RFC6937 bytes delivered during current event */ + u32 rxt_delivered; /**< Rxt bytes delivered during current cc event */ u32 tsecr_last_ack; /**< Timestamp echoed to us in last healthy ACK */ u32 snd_congestion; /**< snd_una_max when congestion is detected */ u32 tx_fifo_size; /**< Tx fifo size. Used to constrain cwnd */ @@ -757,7 +764,7 @@ void tcp_send_window_update_ack (tcp_connection_t * tc); void tcp_program_ack (tcp_connection_t * tc); void tcp_program_dupack (tcp_connection_t * tc); -void tcp_program_fastretransmit (tcp_connection_t * tc); +void tcp_program_retransmit (tcp_connection_t * tc); /* * Rate estimation @@ -857,18 +864,9 @@ tcp_flight_size (const tcp_connection_t * tc) int flight_size; flight_size = (int) (tc->snd_nxt - tc->snd_una) - tcp_bytes_out (tc) - + tc->snd_rxt_bytes; + + tc->snd_rxt_bytes - tc->rxt_delivered; - if (flight_size < 0) - { - if (0) - clib_warning - ("Negative: %u %u %u dupacks %u sacked bytes %u flags %d", - tc->snd_una_max - tc->snd_una, tcp_bytes_out (tc), - tc->snd_rxt_bytes, tc->rcv_dupacks, tc->sack_sb.sacked_bytes, - tc->rcv_opts.flags); - return 0; - } + ASSERT (flight_size >= 0); return flight_size; } @@ -912,7 +910,8 @@ tcp_cwnd_accumulate (tcp_connection_t * tc, u32 thresh, u32 bytes) always_inline u32 tcp_loss_wnd (const tcp_connection_t * tc) { - return tc->snd_mss; + /* Whatever we have in flight + the packet we're about to send */ + return tcp_flight_size (tc) + tc->snd_mss; } always_inline u32 @@ -951,22 +950,14 @@ tcp_available_cc_snd_space (const tcp_connection_t * tc) always_inline u8 tcp_is_lost_fin (tcp_connection_t * tc) { - if ((tc->flags & TCP_CONN_FINSNT) && tc->snd_una_max - tc->snd_una == 1) + if ((tc->flags & TCP_CONN_FINSNT) && (tc->snd_una_max - tc->snd_una == 1)) return 1; return 0; } u32 tcp_snd_space (tcp_connection_t * tc); -int tcp_retransmit_first_unacked (tcp_worker_ctx_t * wrk, - tcp_connection_t * tc); -int tcp_fast_retransmit_no_sack (tcp_worker_ctx_t * wrk, - tcp_connection_t * tc, u32 burst_size); -int tcp_fast_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, - u32 burst_size); -int tcp_fast_retransmit (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, - u32 burst_size); -void tcp_cc_init_congestion (tcp_connection_t * tc); -void tcp_cc_fastrecovery_clear (tcp_connection_t * tc); +//void tcp_cc_init_congestion (tcp_connection_t * tc); +//void tcp_cc_fastrecovery_clear (tcp_connection_t * tc); fib_node_index_t tcp_lookup_rmt_in_fib (tcp_connection_t * tc); @@ -1036,6 +1027,12 @@ tcp_cc_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type, } static inline void +tcp_cc_congestion (tcp_connection_t * tc) +{ + tc->cc_algo->congestion (tc); +} + +static inline void tcp_cc_loss (tcp_connection_t * tc) { tc->cc_algo->loss (tc); @@ -1068,9 +1065,10 @@ tcp_cc_get_pacing_rate (tcp_connection_t * tc) return tc->cc_algo->get_pacing_rate (tc); f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); + /* TODO should constrain to interface's max throughput but * we don't have link speeds for sw ifs ..*/ - return (tc->cwnd / srtt); + return ((f64) tc->cwnd / srtt); } always_inline void diff --git a/src/vnet/tcp/tcp_cubic.c b/src/vnet/tcp/tcp_cubic.c index aa318961533..17f7932ea2e 100644 --- a/src/vnet/tcp/tcp_cubic.c +++ b/src/vnet/tcp/tcp_cubic.c @@ -103,10 +103,7 @@ cubic_congestion (tcp_connection_t * tc) cd->w_max = w_max; tc->ssthresh = clib_max (tc->cwnd * beta_cubic, 2 * tc->snd_mss); - tc->cwnd = tc->ssthresh; - if (!tcp_opts_sack_permitted (&tc->rcv_opts)) - tc->cwnd += 3 * tc->snd_mss; } static void @@ -114,11 +111,10 @@ cubic_loss (tcp_connection_t * tc) { cubic_data_t *cd = (cubic_data_t *) tcp_cc_data (tc); - tc->ssthresh = clib_max (tc->cwnd * beta_cubic, 2 * tc->snd_mss); tc->cwnd = tcp_loss_wnd (tc); cd->t_start = cubic_time (tc->c_thread_index); cd->K = 0; - cd->w_max = 0; + cd->w_max = tc->cwnd / tc->snd_mss; } static void @@ -159,7 +155,7 @@ cubic_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs) if (tcp_in_slowstart (tc)) { - tc->cwnd += clib_min (tc->snd_mss, tc->bytes_acked); + tc->cwnd += tc->bytes_acked; return; } diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 933f913bc5a..b49b8e8fd77 100755 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -508,8 +508,6 @@ tcp_update_rtt (tcp_connection_t * tc, tcp_rate_sample_t * rs, u32 ack) mrtt = rs->rtt_time * THZ; goto estimate_rtt; } - if (tcp_in_recovery (tc)) - return 0; goto done; } @@ -784,9 +782,17 @@ scoreboard_insert_hole (sack_scoreboard_t * sb, u32 prev_index, return hole; } -#endif /* CLIB_MARCH_VARIANT */ -#ifndef CLIB_MARCH_VARIANT +always_inline void +scoreboard_update_sacked_rxt (sack_scoreboard_t * sb, u32 start, u32 end, + u8 has_rxt) +{ + if (!has_rxt || seq_geq (start, sb->high_rxt)) + return; + + sb->rxt_sacked += + seq_lt (end, sb->high_rxt) ? (end - start) : (sb->high_rxt - start); +} always_inline void scoreboard_update_bytes (sack_scoreboard_t * sb, u32 ack, u32 snd_mss) @@ -837,7 +843,7 @@ scoreboard_update_bytes (sack_scoreboard_t * sb, u32 ack, u32 snd_mss) while (right) { sb->lost_bytes += scoreboard_hole_bytes (right); - sb->last_lost_bytes += right->is_lost ? 0 : right->end - right->start; + sb->last_lost_bytes += right->is_lost ? 0 : (right->end - right->start); right->is_lost = 1; left = scoreboard_prev_hole (sb, right); if (!left) @@ -912,9 +918,8 @@ scoreboard_next_rxt_hole (sack_scoreboard_t * sb, return hole; } -#endif /* CLIB_MARCH_VARIANT */ -static void +void scoreboard_init_high_rxt (sack_scoreboard_t * sb, u32 snd_una) { sack_scoreboard_hole_t *hole; @@ -928,7 +933,6 @@ scoreboard_init_high_rxt (sack_scoreboard_t * sb, u32 snd_una) sb->rescue_rxt = snd_una - 1; } -#ifndef CLIB_MARCH_VARIANT void scoreboard_init (sack_scoreboard_t * sb) { @@ -957,6 +961,23 @@ scoreboard_clear (sack_scoreboard_t * sb) sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX; sb->is_reneging = 0; } + +void +scoreboard_clear_reneging (sack_scoreboard_t * sb, u32 start, u32 end) +{ + sack_scoreboard_hole_t *last_hole; + + clib_warning ("sack reneging"); + + scoreboard_clear (sb); + last_hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX, + start, end); + last_hole->is_lost = 1; + sb->tail = scoreboard_hole_index (sb, last_hole); + sb->high_sacked = start; + scoreboard_init_high_rxt (sb, start); +} + #endif /* CLIB_MARCH_VARIANT */ /** @@ -983,14 +1004,18 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) sack_scoreboard_t *sb = &tc->sack_sb; sack_block_t *blk, *rcv_sacks; u32 blk_index = 0, i, j; + u8 has_rxt; sb->last_sacked_bytes = 0; sb->last_bytes_delivered = 0; + sb->rxt_sacked = 0; if (!tcp_opts_sack (&tc->rcv_opts) && sb->head == TCP_INVALID_SACK_HOLE_INDEX) return; + has_rxt = tcp_in_cong_recovery (tc); + /* Remove invalid blocks */ blk = tc->rcv_opts.sacks; while (blk < vec_end (tc->rcv_opts.sacks)) @@ -1121,6 +1146,8 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) sb->is_reneging = 0; } } + scoreboard_update_sacked_rxt (sb, hole->start, hole->end, + has_rxt); scoreboard_remove_hole (sb, hole); hole = next_hole; } @@ -1129,6 +1156,8 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) { if (seq_gt (blk->end, hole->start)) { + scoreboard_update_sacked_rxt (sb, hole->start, blk->end, + has_rxt); hole->start = blk->end; } blk_index++; @@ -1145,11 +1174,17 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) /* Pool might've moved */ hole = scoreboard_get_hole (sb, hole_index); hole->end = blk->start; + + scoreboard_update_sacked_rxt (sb, blk->start, blk->end, + has_rxt); + blk_index++; ASSERT (hole->next == scoreboard_hole_index (sb, next_hole)); } else if (seq_lt (blk->start, hole->end)) { + scoreboard_update_sacked_rxt (sb, blk->start, hole->end, + has_rxt); hole->end = blk->start; } hole = scoreboard_next_hole (sb, hole); @@ -1166,6 +1201,8 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) ASSERT (sb->head == TCP_INVALID_SACK_HOLE_INDEX || tcp_in_recovery (tc) || sb->is_reneging || sb->holes[sb->head].start == ack); ASSERT (sb->last_lost_bytes <= sb->lost_bytes); + ASSERT ((ack - tc->snd_una) + sb->last_sacked_bytes + - sb->last_bytes_delivered >= sb->rxt_sacked); TCP_EVT (TCP_EVT_CC_SCOREBOARD, tc); } @@ -1209,70 +1246,41 @@ tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd) } } -#ifndef CLIB_MARCH_VARIANT /** * Init loss recovery/fast recovery. * * Triggered by dup acks as opposed to timer timeout. Note that cwnd is * updated in @ref tcp_cc_handle_event after fast retransmit */ -void +static void tcp_cc_init_congestion (tcp_connection_t * tc) { tcp_fastrecovery_on (tc); tc->snd_congestion = tc->snd_nxt; tc->cwnd_acc_bytes = 0; tc->snd_rxt_bytes = 0; + tc->rxt_delivered = 0; + tc->prr_delivered = 0; tc->prev_ssthresh = tc->ssthresh; tc->prev_cwnd = tc->cwnd; - tc->cc_algo->congestion (tc); - tc->fr_occurences += 1; - TCP_EVT (TCP_EVT_CC_EVT, tc, 4); -} -#endif /* CLIB_MARCH_VARIANT */ -static void -tcp_cc_recovery_exit (tcp_connection_t * tc) -{ - tc->rto_boff = 0; - tcp_update_rto (tc); - tc->snd_rxt_ts = 0; - tc->rtt_ts = 0; - tcp_recovery_off (tc); - TCP_EVT (TCP_EVT_CC_EVT, tc, 3); -} + tcp_cc_congestion (tc); -#ifndef CLIB_MARCH_VARIANT -void -tcp_cc_fastrecovery_clear (tcp_connection_t * tc) -{ - tc->snd_rxt_bytes = 0; - tc->rcv_dupacks = 0; - tc->rtt_ts = 0; + /* Post retransmit update cwnd to ssthresh and account for the + * three segments that have left the network and should've been + * buffered at the receiver XXX */ + if (!tcp_opts_sack_permitted (&tc->rcv_opts)) + tc->cwnd += 3 * tc->snd_mss; - tcp_fastrecovery_off (tc); - tcp_fastrecovery_first_off (tc); - tc->flags &= ~TCP_CONN_FRXT_PENDING; - - TCP_EVT (TCP_EVT_CC_EVT, tc, 3); + tc->fr_occurences += 1; + TCP_EVT (TCP_EVT_CC_EVT, tc, 4); } -#endif /* CLIB_MARCH_VARIANT */ static void tcp_cc_congestion_undo (tcp_connection_t * tc) { tc->cwnd = tc->prev_cwnd; tc->ssthresh = tc->prev_ssthresh; - tc->rcv_dupacks = 0; - if (tcp_in_recovery (tc)) - { - tcp_cc_recovery_exit (tc); - tc->snd_nxt = seq_max (tc->snd_nxt, tc->snd_congestion); - } - else if (tcp_in_fastrecovery (tc)) - { - tcp_cc_fastrecovery_clear (tc); - } tcp_cc_undo_recovery (tc); ASSERT (tc->rto_boff == 0); TCP_EVT (TCP_EVT_CC_EVT, tc, 5); @@ -1288,48 +1296,74 @@ tcp_cc_is_spurious_timeout_rxt (tcp_connection_t * tc) } static inline u8 -tcp_cc_is_spurious_fast_rxt (tcp_connection_t * tc) +tcp_cc_is_spurious_retransmit (tcp_connection_t * tc) { - return (tcp_in_fastrecovery (tc) - && tc->cwnd > tc->ssthresh + 3 * tc->snd_mss); + return (tcp_cc_is_spurious_timeout_rxt (tc)); } -static u8 -tcp_cc_is_spurious_retransmit (tcp_connection_t * tc) +static inline u8 +tcp_should_fastrecover_sack (tcp_connection_t * tc) +{ + return (tc->sack_sb.lost_bytes + || ((TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss + < tc->sack_sb.sacked_bytes)); +} + +static inline u8 +tcp_should_fastrecover (tcp_connection_t * tc) { - return (tcp_cc_is_spurious_timeout_rxt (tc) - || tcp_cc_is_spurious_fast_rxt (tc)); + return ((tc->rcv_dupacks == TCP_DUPACK_THRESHOLD) + || tcp_should_fastrecover_sack (tc)); } static int tcp_cc_recover (tcp_connection_t * tc) { sack_scoreboard_hole_t *hole; + u8 is_spurious = 0; ASSERT (tcp_in_cong_recovery (tc)); - hole = scoreboard_first_hole (&tc->sack_sb); - if (hole && hole->start == tc->snd_una && hole->end == tc->snd_nxt) - scoreboard_clear (&tc->sack_sb); - if (tcp_cc_is_spurious_retransmit (tc)) { tcp_cc_congestion_undo (tc); - return 1; + is_spurious = 1; } - if (tcp_in_recovery (tc)) - tcp_cc_recovery_exit (tc); - else if (tcp_in_fastrecovery (tc)) + tc->prr_delivered = 0; + tc->rxt_delivered = 0; + tc->snd_rxt_bytes = 0; + tc->snd_rxt_ts = 0; + tc->rtt_ts = 0; + tc->flags &= ~TCP_CONN_RXT_PENDING; + + /* Previous recovery left us congested. Continue sending as part + * of the current recovery event with an updated snd_congestion */ + if (tc->sack_sb.sacked_bytes) { - tcp_cc_recovered (tc); - tcp_cc_fastrecovery_clear (tc); + tc->snd_congestion = tc->snd_nxt; + if (tcp_opts_sack_permitted (&tc->rcv_opts)) + scoreboard_init_high_rxt (&tc->sack_sb, tc->snd_una); + tcp_program_retransmit (tc); + return is_spurious; } + hole = scoreboard_first_hole (&tc->sack_sb); + if (hole && hole->start == tc->snd_una && hole->end == tc->snd_nxt) + scoreboard_clear (&tc->sack_sb); + + if (!tcp_in_recovery (tc) && !is_spurious) + tcp_cc_recovered (tc); + + tcp_fastrecovery_off (tc); + tcp_fastrecovery_first_off (tc); + tcp_recovery_off (tc); + TCP_EVT (TCP_EVT_CC_EVT, tc, 3); + ASSERT (tc->rto_boff == 0); ASSERT (!tcp_in_cong_recovery (tc)); ASSERT (tcp_scoreboard_is_sane_post_recovery (tc)); - return 0; + return is_spurious; } static void @@ -1353,17 +1387,28 @@ tcp_cc_update (tcp_connection_t * tc, tcp_rate_sample_t * rs) tc->snd_congestion = tc->snd_una - 1; } -static u8 -tcp_should_fastrecover_sack (tcp_connection_t * tc) +static void +tcp_update_delivered (tcp_connection_t * tc, u8 is_dack, u8 has_sack) { - return (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss < tc->sack_sb.sacked_bytes; -} + if (has_sack) + { + ASSERT (tc->bytes_acked >= tc->sack_sb.last_bytes_delivered + || (tc->flags & TCP_CONN_FINSNT)); -static u8 -tcp_should_fastrecover (tcp_connection_t * tc) -{ - return (tc->rcv_dupacks == TCP_DUPACK_THRESHOLD - || tcp_should_fastrecover_sack (tc)); + tc->rxt_delivered += tc->sack_sb.rxt_sacked; + tc->prr_delivered += tc->bytes_acked + tc->sack_sb.last_sacked_bytes + - tc->sack_sb.last_bytes_delivered; + } + else + { + tcp_fastrecovery_first_on (tc); + tc->rxt_delivered = clib_max (tc->rxt_delivered + tc->bytes_acked, + tc->snd_rxt_bytes); + if (is_dack) + tc->prr_delivered += is_dack; + else + tc->prr_delivered += tc->bytes_acked - tc->snd_mss * tc->rcv_dupacks; + } } /** @@ -1380,29 +1425,21 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs, * and accounting for segments that have left the network are done * lower. */ - if (tcp_in_fastrecovery (tc)) + if (tcp_in_cong_recovery (tc)) { if (!has_sack) - tc->rcv_dupacks++; + tc->rcv_dupacks += is_dack; if (!tc->bytes_acked) { - tcp_program_fastretransmit (tc); + tcp_update_delivered (tc, is_dack, has_sack); + tcp_program_retransmit (tc); if (!has_sack) tcp_cc_rcv_cong_ack (tc, TCP_CC_DUPACK, rs); return; } } /* - * In timer triggered recovery - */ - else if (tcp_in_recovery (tc)) - { - /* No fast recovery entry at this point */ - if (!tc->bytes_acked) - return; - } - /* * Duplicate ACK. Check if we should enter fast recovery */ else if (is_dack) @@ -1410,33 +1447,24 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs, TCP_EVT (TCP_EVT_DUPACK_RCVD, tc, 1); ASSERT (tc->snd_una != tc->snd_nxt || tc->sack_sb.last_sacked_bytes); + /* Heuristic to catch potential late dupacks */ + if (!tc->sack_sb.sacked_bytes && tc->snd_una == tc->snd_congestion + && timestamp_leq (tc->rcv_opts.tsecr, tc->tsecr_last_ack)) + return; + tc->rcv_dupacks++; if (tcp_should_fastrecover (tc)) { - u32 pacer_wnd; - ASSERT (!tcp_in_fastrecovery (tc)); - /* Heuristic to catch potential late dupacks - * after fast retransmit exits */ - if (is_dack && tc->snd_una == tc->snd_congestion - && timestamp_leq (tc->rcv_opts.tsecr, tc->tsecr_last_ack)) - { - tc->rcv_dupacks = 0; - return; - } - tcp_cc_init_congestion (tc); if (has_sack) scoreboard_init_high_rxt (&tc->sack_sb, tc->snd_una); - /* Constrain rate until we get a partial ack */ - pacer_wnd = clib_max (0.1 * tc->cwnd, 2 * tc->snd_mss); - tcp_connection_tx_pacer_reset (tc, pacer_wnd, - 0 /* start bucket */ ); - tcp_program_fastretransmit (tc); + tcp_connection_tx_pacer_reset (tc, tc->cwnd, 0 /* start bucket */ ); + tcp_program_retransmit (tc); return; } else @@ -1475,6 +1503,10 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs, * Legitimate ACK. 1) See if we can exit recovery */ + /* RFC6675: If the incoming ACK is a cumulative acknowledgment, + * reset dupacks to 0. Also needed if in congestion recovery */ + tc->rcv_dupacks = 0; + if (seq_geq (tc->snd_una, tc->snd_congestion)) { /* If spurious return, we've already updated everything */ @@ -1493,55 +1525,18 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs, * Legitimate ACK. 2) If PARTIAL ACK try to retransmit */ - /* RFC6675: If the incoming ACK is a cumulative acknowledgment, - * reset dupacks to 0. Also needed if in congestion recovery */ - tc->rcv_dupacks = 0; - - /* Post RTO timeout don't try anything fancy */ - if (tcp_in_recovery (tc)) - { - tcp_cc_rcv_ack (tc, rs); - transport_add_tx_event (&tc->connection); - return; - } - /* Remove retransmitted bytes that have been delivered */ - if (has_sack) - { - ASSERT (tc->bytes_acked >= tc->sack_sb.last_bytes_delivered - || (tc->flags & TCP_CONN_FINSNT)); + tcp_update_delivered (tc, is_dack, has_sack); - /* If we have sacks and we haven't gotten an ack beyond high_rxt, - * remove sacked bytes delivered */ - if (seq_lt (tc->snd_una, tc->sack_sb.high_rxt)) - { - u32 rxt_delivered; - rxt_delivered = tc->bytes_acked - tc->sack_sb.last_bytes_delivered; - ASSERT (tc->snd_rxt_bytes >= rxt_delivered); - tc->snd_rxt_bytes -= rxt_delivered; - } - else - { - /* Apparently all retransmitted holes have been acked */ - tc->snd_rxt_bytes = 0; - tc->sack_sb.high_rxt = tc->snd_una; - } - } + if (tcp_in_recovery (tc)) + tcp_cc_rcv_ack (tc, rs); else - { - tcp_fastrecovery_first_on (tc); - if (tc->snd_rxt_bytes > tc->bytes_acked) - tc->snd_rxt_bytes -= tc->bytes_acked; - else - tc->snd_rxt_bytes = 0; - } - - tcp_cc_rcv_cong_ack (tc, TCP_CC_PARTIALACK, rs); + tcp_cc_rcv_cong_ack (tc, TCP_CC_PARTIALACK, rs); /* * Since this was a partial ack, try to retransmit some more data */ - tcp_program_fastretransmit (tc); + tcp_program_retransmit (tc); } /** @@ -2767,6 +2762,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Update rtt and rto */ tcp_estimate_initial_rtt (tc0); + tcp_connection_tx_pacer_update (tc0); /* Switch state to ESTABLISHED */ tc0->state = TCP_STATE_ESTABLISHED; diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c index 8b704a6ba04..e9213a2e6d9 100644 --- a/src/vnet/tcp/tcp_newreno.c +++ b/src/vnet/tcp/tcp_newreno.c @@ -20,17 +20,11 @@ newreno_congestion (tcp_connection_t * tc) { tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss); tc->cwnd = tc->ssthresh; - /* Post retransmit update cwnd to ssthresh and account for the - * three segments that have left the network and should've been - * buffered at the receiver XXX */ - if (!tcp_opts_sack_permitted (&tc->rcv_opts)) - tc->cwnd += 3 * tc->snd_mss; } static void newreno_loss (tcp_connection_t * tc) { - tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss); tc->cwnd = tcp_loss_wnd (tc); } diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 15aa85712e3..069b823ee36 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1159,12 +1159,12 @@ tcp_program_dupack (tcp_connection_t * tc) } void -tcp_program_fastretransmit (tcp_connection_t * tc) +tcp_program_retransmit (tcp_connection_t * tc) { - if (!(tc->flags & TCP_CONN_FRXT_PENDING)) + if (!(tc->flags & TCP_CONN_RXT_PENDING)) { session_add_self_custom_tx_evt (&tc->connection, 0); - tc->flags |= TCP_CONN_FRXT_PENDING; + tc->flags |= TCP_CONN_RXT_PENDING; } } @@ -1369,19 +1369,31 @@ tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk, if (!n_bytes) return 0; - if (tcp_in_fastrecovery (tc)) - { - tc->snd_rxt_bytes += n_bytes; - if (tc->flags & TCP_CONN_RATE_SAMPLE) - tcp_bt_track_rxt (tc, start, start + n_bytes); - } + tc->snd_rxt_bytes += n_bytes; + + if (tc->flags & TCP_CONN_RATE_SAMPLE) + tcp_bt_track_rxt (tc, start, start + n_bytes); tc->bytes_retrans += n_bytes; tc->segs_retrans += 1; TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes); + return n_bytes; } +static void +tcp_check_sack_reneging (tcp_connection_t * tc) +{ + sack_scoreboard_t *sb = &tc->sack_sb; + sack_scoreboard_hole_t *hole; + + hole = scoreboard_first_hole (sb); + if (!sb->is_reneging && (!hole || hole->start == tc->snd_una)) + return; + + scoreboard_clear_reneging (sb, tc->snd_una, tc->snd_nxt); +} + /** * Reset congestion control, switch cwnd to loss window and try again. */ @@ -1389,23 +1401,21 @@ static void tcp_cc_init_rxt_timeout (tcp_connection_t * tc) { TCP_EVT (TCP_EVT_CC_EVT, tc, 6); + tc->prev_ssthresh = tc->ssthresh; tc->prev_cwnd = tc->cwnd; - /* Clear fast recovery state if needed */ - if (tcp_in_fastrecovery (tc)) - tcp_cc_fastrecovery_clear (tc); + /* If we entrered loss without fast recovery, notify cc algo of the + * congestion event such that it can update ssthresh and its state */ + if (!tcp_in_fastrecovery (tc)) + tcp_cc_congestion (tc); - /* Let cc algo decide loss cwnd and ssthresh */ + /* Let cc algo decide loss cwnd and ssthresh post unrecovered loss */ tcp_cc_loss (tc); - /* Start again from the beginning */ - tc->snd_congestion = tc->snd_nxt; - tc->rcv_dupacks = 0; tc->rtt_ts = 0; tc->cwnd_acc_bytes = 0; tc->tr_occurences += 1; - tcp_connection_tx_pacer_reset (tc, tc->cwnd, 2 * tc->snd_mss); tcp_recovery_on (tc); } @@ -1476,29 +1486,14 @@ tcp_timer_retransmit_handler (u32 tc_index) return; } - /* Increment RTO backoff (also equal to number of retries) and go back - * to first un-acked byte */ - tc->rto_boff += 1; + if (tcp_opts_sack_permitted (&tc->rcv_opts)) + tcp_check_sack_reneging (tc); - /* TODO be less aggressive about clearing scoreboard */ - scoreboard_clear (&tc->sack_sb); + /* Update send congestion to make sure that rxt has data to send */ + tc->snd_congestion = tc->snd_nxt; - /* First retransmit timeout */ - if (tc->rto_boff == 1) - { - tcp_cc_init_rxt_timeout (tc); - /* Record timestamp. Eifel detection algorithm RFC3522 */ - tc->snd_rxt_ts = tcp_tstamp (tc); - } - - if (tc->flags & TCP_CONN_RATE_SAMPLE) - tcp_bt_flush_samples (tc); - - /* If we've sent beyond snd_congestion, update it */ - tc->snd_congestion = seq_max (tc->snd_nxt, tc->snd_congestion); - tc->snd_nxt = tc->snd_una; - - /* Send one segment. n_bytes may be zero due to buffer shortfall */ + /* Send the first unacked segment. If we're short on buffers, return + * as soon as possible */ n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b); if (!n_bytes) { @@ -1511,6 +1506,19 @@ tcp_timer_retransmit_handler (u32 tc_index) tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); tcp_retransmit_timer_force_update (tc); + + tc->rto_boff += 1; + if (tc->rto_boff == 1) + { + tcp_cc_init_rxt_timeout (tc); + /* Record timestamp. Eifel detection algorithm RFC3522 */ + tc->snd_rxt_ts = tcp_tstamp (tc); + } + + if (tcp_opts_sack_permitted (&tc->rcv_opts)) + scoreboard_init_high_rxt (&tc->sack_sb, tc->snd_una + tc->snd_mss); + + tcp_program_retransmit (tc); } /* Retransmit SYN-ACK */ else if (tc->state == TCP_STATE_SYN_RCVD) @@ -1728,8 +1736,8 @@ tcp_retransmit_first_unacked (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) } static int -tcp_fast_retransmit_unsent (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, - u32 burst_size) +tcp_transmit_unsent (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, + u32 burst_size) { u32 offset, n_segs = 0, n_written, bi, available_wnd; vlib_main_t *vm = wrk->vm; @@ -1758,18 +1766,44 @@ done: return n_segs; } +/** + * Estimate send space using proportional rate reduction (RFC6937) + */ +static int +tcp_fastrecovery_prr_snd_space (tcp_connection_t * tc) +{ + u32 pipe, prr_out; + int space; + + pipe = tcp_flight_size (tc); + prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion); + + if (pipe > tc->ssthresh) + { + space = ((int) tc->prr_delivered * ((f64) tc->ssthresh / tc->prev_cwnd)) + - prr_out; + } + else + { + int limit = tc->prr_delivered - prr_out + tc->snd_mss; + space = clib_min (tc->ssthresh - pipe, limit); + } + space = clib_max (space, prr_out ? 0 : tc->snd_mss); + return space; +} + #define scoreboard_rescue_rxt_valid(_sb, _tc) \ (seq_geq (_sb->rescue_rxt, _tc->snd_una) \ && seq_leq (_sb->rescue_rxt, _tc->snd_congestion)) /** - * Do fast retransmit with SACKs + * Do retransmit with SACKs */ -int -tcp_fast_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, - u32 burst_size) +static int +tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, + u32 burst_size) { - u32 n_written = 0, offset, max_bytes, n_segs = 0, n_segs_now; + u32 n_written = 0, offset, max_bytes, n_segs = 0; sack_scoreboard_hole_t *hole; vlib_main_t *vm = wrk->vm; vlib_buffer_t *b = 0; @@ -1778,12 +1812,19 @@ tcp_fast_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, int snd_space; u8 snd_limited = 0, can_rescue = 0; - ASSERT (tcp_in_fastrecovery (tc)); + ASSERT (tcp_in_cong_recovery (tc)); + + if (tcp_in_recovery (tc)) + snd_space = tcp_available_cc_snd_space (tc); + else + snd_space = tcp_fastrecovery_prr_snd_space (tc); - snd_space = tcp_available_cc_snd_space (tc); if (snd_space < tc->snd_mss) { - tcp_program_fastretransmit (tc); + /* We're cc constrained so don't accumulate tokens */ + transport_connection_tx_pacer_reset_bucket (&tc->connection, + vm-> + clib_time.last_cpu_time); return 0; } @@ -1800,19 +1841,26 @@ tcp_fast_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, &snd_limited); if (!hole) { + /* We are out of lost holes to retransmit so send some new data. */ if (max_deq) { + u32 n_segs_new, av_window; + av_window = tc->snd_wnd - (tc->snd_nxt - tc->snd_una); + snd_space = clib_min (snd_space, av_window); snd_space = clib_min (max_deq, snd_space); burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss); - n_segs_now = tcp_fast_retransmit_unsent (wrk, tc, burst_size); - if (max_deq > n_segs_now * tc->snd_mss) - tcp_program_fastretransmit (tc); - n_segs += n_segs_now; + burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST); + n_segs_new = tcp_transmit_unsent (wrk, tc, burst_size); + if (max_deq > n_segs_new * tc->snd_mss) + tcp_program_retransmit (tc); + + n_segs += n_segs_new; goto done; } - if (!can_rescue || scoreboard_rescue_rxt_valid (sb, tc)) + if (tcp_in_recovery (tc) || !can_rescue + || scoreboard_rescue_rxt_valid (sb, tc)) break; /* If rescue rxt undefined or less than snd_una then one segment of @@ -1859,18 +1907,19 @@ tcp_fast_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, } if (hole) - tcp_program_fastretransmit (tc); + tcp_program_retransmit (tc); done: + return n_segs; } /** * Fast retransmit without SACK info */ -int -tcp_fast_retransmit_no_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, - u32 burst_size) +static int +tcp_retransmit_no_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, + u32 burst_size) { u32 n_written = 0, offset = 0, bi, max_deq, n_segs_now; vlib_main_t *vm = wrk->vm; @@ -1918,9 +1967,9 @@ send_unsent: { snd_space = clib_min (max_deq, snd_space); burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss); - n_segs_now = tcp_fast_retransmit_unsent (wrk, tc, burst_size); + n_segs_now = tcp_transmit_unsent (wrk, tc, burst_size); if (max_deq > n_segs_now * tc->snd_mss) - tcp_program_fastretransmit (tc); + tcp_program_retransmit (tc); n_segs += n_segs_now; } @@ -1932,14 +1981,13 @@ done: /** * Do fast retransmit */ -int -tcp_fast_retransmit (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, - u32 burst_size) +static int +tcp_retransmit (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, u32 burst_size) { if (tcp_opts_sack_permitted (&tc->rcv_opts)) - return tcp_fast_retransmit_sack (wrk, tc, burst_size); + return tcp_retransmit_sack (wrk, tc, burst_size); else - return tcp_fast_retransmit_no_sack (wrk, tc, burst_size); + return tcp_retransmit_no_sack (wrk, tc, burst_size); } static int @@ -1991,7 +2039,7 @@ tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size) } static int -tcp_do_fastretransmit (tcp_connection_t * tc, u32 max_burst_size) +tcp_do_retransmit (tcp_connection_t * tc, u32 max_burst_size) { u32 n_segs = 0, burst_size, sent_bytes, burst_bytes; tcp_worker_ctx_t *wrk; @@ -2003,11 +2051,11 @@ tcp_do_fastretransmit (tcp_connection_t * tc, u32 max_burst_size) burst_size = clib_min (max_burst_size, burst_bytes / tc->snd_mss); if (!burst_size) { - tcp_program_fastretransmit (tc); + tcp_program_retransmit (tc); return 0; } - n_segs = tcp_fast_retransmit (wrk, tc, burst_size); + n_segs = tcp_retransmit (wrk, tc, burst_size); sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes); transport_connection_tx_pacer_update_bytes (&tc->connection, sent_bytes); return n_segs; @@ -2019,10 +2067,10 @@ tcp_session_custom_tx (void *conn, u32 max_burst_size) tcp_connection_t *tc = (tcp_connection_t *) conn; u32 n_segs = 0; - if (tcp_in_fastrecovery (tc) && (tc->flags & TCP_CONN_FRXT_PENDING)) + if (tcp_in_cong_recovery (tc) && (tc->flags & TCP_CONN_RXT_PENDING)) { - tc->flags &= ~TCP_CONN_FRXT_PENDING; - n_segs = tcp_do_fastretransmit (tc, max_burst_size); + tc->flags &= ~TCP_CONN_RXT_PENDING; + n_segs = tcp_do_retransmit (tc, max_burst_size); max_burst_size -= n_segs; } diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h index 18b70ebb1ad..1e637a83271 100644 --- a/src/vnet/tcp/tcp_packet.h +++ b/src/vnet/tcp/tcp_packet.h @@ -143,9 +143,8 @@ typedef struct _sack_block typedef struct { u8 flags; /** Option flags, see above */ - - u16 mss; /**< Maximum segment size advertised */ u8 wscale; /**< Window scale advertised */ + u16 mss; /**< Maximum segment size advertised */ u32 tsval; /**< Timestamp value */ u32 tsecr; /**< Echoed/reflected time stamp */ sack_block_t *sacks; /**< SACK blocks */ |