From 36ebcfffbc7ab0e83b4bb8dfaec16bf16cafb954 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Thu, 12 Sep 2019 18:36:44 -0700 Subject: tcp: use sacks for timer based recovery Type: feature If available, reuse sack scoreboard in timer triggered retransmit to minimize spurious retransmits. Additional changes/refactoring: - limited transmit updates - add sacked rxt count to scoreboard - prr pacing of fast retransmits - startup pacing updates - changed loss window to flight + mss Change-Id: I057de6a9d6401698bd1031d5cf5cfbb62f2bdf61 Signed-off-by: Florin Coras --- src/vnet/tcp/tcp.h | 52 +++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 27 deletions(-) (limited to 'src/vnet/tcp/tcp.h') diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 227b3f8a856..a1b7d4cbd0d 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -32,6 +32,7 @@ #define TCP_MAX_OPTION_SPACE 40 #define TCP_CC_DATA_SZ 24 #define TCP_MAX_GSO_SZ 65536 +#define TCP_RXT_MAX_BURST 10 #define TCP_DUPACK_THRESHOLD 3 #define TCP_IW_N_SEGMENTS 10 @@ -111,7 +112,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; _(DCNT_PENDING, "Disconnect pending") \ _(HALF_OPEN_DONE, "Half-open completed") \ _(FINPNDG, "FIN pending") \ - _(FRXT_PENDING, "Fast-retransmit pending") \ + _(RXT_PENDING, "Retransmit pending") \ _(FRXT_FIRST, "Fast-retransmit first again") \ _(DEQ_PENDING, "Pending dequeue acked") \ _(PSH_PENDING, "PSH pending") \ @@ -166,6 +167,7 @@ typedef struct _sack_scoreboard u32 sacked_bytes; /**< Number of bytes sacked in sb */ u32 last_sacked_bytes; /**< Number of bytes last sacked */ u32 last_bytes_delivered; /**< Sack bytes delivered to app */ + u32 rxt_sacked; /**< Rxt last delivered */ u32 high_sacked; /**< Highest byte sacked (fack) */ u32 high_rxt; /**< Highest retransmitted sequence */ u32 rescue_rxt; /**< Rescue sequence number */ @@ -219,8 +221,11 @@ sack_scoreboard_hole_t *scoreboard_prev_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole); sack_scoreboard_hole_t *scoreboard_first_hole (sack_scoreboard_t * sb); sack_scoreboard_hole_t *scoreboard_last_hole (sack_scoreboard_t * sb); + void scoreboard_clear (sack_scoreboard_t * sb); +void scoreboard_clear_reneging (sack_scoreboard_t * sb, u32 start, u32 end); void scoreboard_init (sack_scoreboard_t * sb); +void scoreboard_init_high_rxt (sack_scoreboard_t * sb, u32 snd_una); u8 *format_tcp_scoreboard (u8 * s, va_list * args); #define TCP_BTS_INVALID_INDEX ((u32)~0) @@ -360,8 +365,10 @@ typedef struct _tcp_connection u32 prev_cwnd; /**< ssthresh before congestion */ u32 bytes_acked; /**< Bytes acknowledged by current segment */ u32 burst_acked; /**< Bytes acknowledged in current burst */ - u32 snd_rxt_bytes; /**< Retransmitted bytes */ + u32 snd_rxt_bytes; /**< Retransmitted bytes during current cc event */ u32 snd_rxt_ts; /**< Timestamp when first packet is retransmitted */ + u32 prr_delivered; /**< RFC6937 bytes delivered during current event */ + u32 rxt_delivered; /**< Rxt bytes delivered during current cc event */ u32 tsecr_last_ack; /**< Timestamp echoed to us in last healthy ACK */ u32 snd_congestion; /**< snd_una_max when congestion is detected */ u32 tx_fifo_size; /**< Tx fifo size. Used to constrain cwnd */ @@ -757,7 +764,7 @@ void tcp_send_window_update_ack (tcp_connection_t * tc); void tcp_program_ack (tcp_connection_t * tc); void tcp_program_dupack (tcp_connection_t * tc); -void tcp_program_fastretransmit (tcp_connection_t * tc); +void tcp_program_retransmit (tcp_connection_t * tc); /* * Rate estimation @@ -857,18 +864,9 @@ tcp_flight_size (const tcp_connection_t * tc) int flight_size; flight_size = (int) (tc->snd_nxt - tc->snd_una) - tcp_bytes_out (tc) - + tc->snd_rxt_bytes; + + tc->snd_rxt_bytes - tc->rxt_delivered; - if (flight_size < 0) - { - if (0) - clib_warning - ("Negative: %u %u %u dupacks %u sacked bytes %u flags %d", - tc->snd_una_max - tc->snd_una, tcp_bytes_out (tc), - tc->snd_rxt_bytes, tc->rcv_dupacks, tc->sack_sb.sacked_bytes, - tc->rcv_opts.flags); - return 0; - } + ASSERT (flight_size >= 0); return flight_size; } @@ -912,7 +910,8 @@ tcp_cwnd_accumulate (tcp_connection_t * tc, u32 thresh, u32 bytes) always_inline u32 tcp_loss_wnd (const tcp_connection_t * tc) { - return tc->snd_mss; + /* Whatever we have in flight + the packet we're about to send */ + return tcp_flight_size (tc) + tc->snd_mss; } always_inline u32 @@ -951,22 +950,14 @@ tcp_available_cc_snd_space (const tcp_connection_t * tc) always_inline u8 tcp_is_lost_fin (tcp_connection_t * tc) { - if ((tc->flags & TCP_CONN_FINSNT) && tc->snd_una_max - tc->snd_una == 1) + if ((tc->flags & TCP_CONN_FINSNT) && (tc->snd_una_max - tc->snd_una == 1)) return 1; return 0; } u32 tcp_snd_space (tcp_connection_t * tc); -int tcp_retransmit_first_unacked (tcp_worker_ctx_t * wrk, - tcp_connection_t * tc); -int tcp_fast_retransmit_no_sack (tcp_worker_ctx_t * wrk, - tcp_connection_t * tc, u32 burst_size); -int tcp_fast_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, - u32 burst_size); -int tcp_fast_retransmit (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, - u32 burst_size); -void tcp_cc_init_congestion (tcp_connection_t * tc); -void tcp_cc_fastrecovery_clear (tcp_connection_t * tc); +//void tcp_cc_init_congestion (tcp_connection_t * tc); +//void tcp_cc_fastrecovery_clear (tcp_connection_t * tc); fib_node_index_t tcp_lookup_rmt_in_fib (tcp_connection_t * tc); @@ -1035,6 +1026,12 @@ tcp_cc_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type, tc->cc_algo->rcv_cong_ack (tc, ack_type, rs); } +static inline void +tcp_cc_congestion (tcp_connection_t * tc) +{ + tc->cc_algo->congestion (tc); +} + static inline void tcp_cc_loss (tcp_connection_t * tc) { @@ -1068,9 +1065,10 @@ tcp_cc_get_pacing_rate (tcp_connection_t * tc) return tc->cc_algo->get_pacing_rate (tc); f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); + /* TODO should constrain to interface's max throughput but * we don't have link speeds for sw ifs ..*/ - return (tc->cwnd / srtt); + return ((f64) tc->cwnd / srtt); } always_inline void -- cgit 1.2.3-korg