diff options
-rwxr-xr-x | src/vnet/tcp/tcp_debug.h | 108 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_input.c | 52 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 21 |
3 files changed, 100 insertions, 81 deletions
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index 4bc6b42e297..eb318cde4b9 100755 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -20,7 +20,7 @@ #define TCP_DEBUG (1) #define TCP_DEBUG_SM (0) -#define TCP_DEBUG_CC (1) +#define TCP_DEBUG_CC (0) #define TCP_DEBUG_CC_STAT (1) #define foreach_tcp_dbg_evt \ @@ -411,21 +411,6 @@ typedef enum _tcp_dbg_evt ed->data[4] = _tc->snd_wnd; \ } -#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \ -{ \ - ELOG_TYPE_DECLARE (_e) = \ - { \ - .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\ - .format_args = "i4i4i4i4i4", \ - }; \ - DECLARE_ETD(_tc, _e, 5); \ - ed->data[0] = _tc->rcv_nxt - _tc->irs; \ - ed->data[1] = _tc->rcv_wnd; \ - ed->data[2] = _tc->snd_nxt - _tc->iss; \ - ed->data[3] = tcp_available_snd_wnd(_tc); \ - ed->data[4] = _tc->snd_wnd; \ -} - #define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...) \ { \ ELOG_TYPE_DECLARE (_e) = \ @@ -441,21 +426,6 @@ typedef enum _tcp_dbg_evt ed->data[4] = tcp_flight_size(_tc); \ } -#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) \ -{ \ - ELOG_TYPE_DECLARE (_e) = \ - { \ - .format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\ - .format_args = "i4i4i4i4i4", \ - }; \ - DECLARE_ETD(_tc, _e, 5); \ - ed->data[0] = _tc->snd_una - _tc->iss; \ - ed->data[1] = _tc->cwnd; \ - ed->data[2] = _tc->snd_wnd; \ - ed->data[3] = tcp_flight_size(_tc); \ - ed->data[4] = _tc->rcv_wnd; \ -} - #define TCP_EVT_PKTIZE_HANDLER(_tc, ...) \ { \ ELOG_TYPE_DECLARE (_e) = \ @@ -601,9 +571,7 @@ if (_av > 0) \ } #else #define TCP_EVT_ACK_SENT_HANDLER(_tc, ...) -#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) #define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...) -#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) #define TCP_EVT_PKTIZE_HANDLER(_tc, ...) #define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...) #define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...) @@ -649,19 +617,6 @@ if (_av > 0) \ */ #if TCP_DEBUG_CC -#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...) \ -{ \ - ELOG_TYPE_DECLARE (_e) = \ - { \ - .format = "rxt: snd_nxt %u offset %u snd %u rxt %u", \ - .format_args = "i4i4i4i4", \ - }; \ - DECLARE_ETD(_tc, _e, 4); \ - ed->data[0] = _tc->snd_nxt - _tc->iss; \ - ed->data[1] = offset; \ - ed->data[2] = n_bytes; \ - ed->data[3] = _tc->snd_rxt_bytes; \ -} #define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \ { \ @@ -669,13 +624,14 @@ if (_av > 0) \ { \ .format = "cc: %s wnd %u snd_cong %u rxt_bytes %u", \ .format_args = "t4i4i4i4", \ - .n_enum_strings = 5, \ + .n_enum_strings = 6, \ .enum_strings = { \ "fast-rxt", \ "rxt-timeout", \ "first-rxt", \ "recovered", \ "congestion", \ + "undo", \ }, \ }; \ DECLARE_ETD(_tc, _e, 4); \ @@ -685,6 +641,50 @@ if (_av > 0) \ ed->data[3] = _tc->snd_rxt_bytes; \ } +#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...) \ +{ \ + ELOG_TYPE_DECLARE (_e) = \ + { \ + .format = "rxt: snd_nxt %u offset %u snd %u rxt %u", \ + .format_args = "i4i4i4i4", \ + }; \ + DECLARE_ETD(_tc, _e, 4); \ + ed->data[0] = _tc->snd_nxt - _tc->iss; \ + ed->data[1] = offset; \ + ed->data[2] = n_bytes; \ + ed->data[3] = _tc->snd_rxt_bytes; \ +} + +#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \ +{ \ + ELOG_TYPE_DECLARE (_e) = \ + { \ + .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\ + .format_args = "i4i4i4i4i4", \ + }; \ + DECLARE_ETD(_tc, _e, 5); \ + ed->data[0] = _tc->rcv_nxt - _tc->irs; \ + ed->data[1] = _tc->rcv_wnd; \ + ed->data[2] = _tc->snd_nxt - _tc->iss; \ + ed->data[3] = tcp_available_snd_wnd(_tc); \ + ed->data[4] = _tc->snd_wnd; \ +} + +#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) \ +{ \ + ELOG_TYPE_DECLARE (_e) = \ + { \ + .format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\ + .format_args = "i4i4i4i4i4", \ + }; \ + DECLARE_ETD(_tc, _e, 5); \ + ed->data[0] = _tc->snd_una - _tc->iss; \ + ed->data[1] = _tc->cwnd; \ + ed->data[2] = _tc->snd_wnd; \ + ed->data[3] = tcp_flight_size(_tc); \ + ed->data[4] = _tc->rcv_wnd; \ +} + #define TCP_EVT_CC_PACK_HANDLER(_tc, ...) \ { \ ELOG_TYPE_DECLARE (_e) = \ @@ -696,6 +696,13 @@ if (_av > 0) \ ed->data[0] = _tc->snd_una - _tc->iss; \ ed->data[1] = _tc->snd_una_max - _tc->iss; \ } +#else +#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...) +#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) +#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) +#define TCP_EVT_CC_PACK_HANDLER(_tc, ...) +#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) +#endif /* * Congestion control stats @@ -744,13 +751,6 @@ if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \ #define TCP_EVT_CC_STAT_HANDLER(_tc, ...) #endif -#else -#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...) -#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) -#define TCP_EVT_CC_PACK_HANDLER(_tc, ...) -#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) -#endif - #endif /* SRC_VNET_TCP_TCP_DEBUG_H_ */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index bd57eca3223..0a36d063482 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -248,8 +248,8 @@ tcp_update_timestamp (tcp_connection_t * tc, u32 seq, u32 seq_end) * then the TSval from the segment is copied to TS.Recent; * otherwise, the TSval is ignored. */ - if (tcp_opts_tstamp (&tc->rcv_opts) && tc->tsval_recent - && seq_leq (seq, tc->rcv_las) && seq_leq (tc->rcv_las, seq_end)) + if (tcp_opts_tstamp (&tc->rcv_opts) && seq_leq (seq, tc->rcv_las) + && seq_leq (tc->rcv_las, seq_end)) { ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval)); tc->tsval_recent = tc->rcv_opts.tsval; @@ -418,51 +418,53 @@ tcp_update_rto (tcp_connection_t * tc) tc->rto = clib_max (tc->rto, TCP_RTO_MIN); } -/** Update RTT estimate and RTO timer +/** + * Update RTT estimate and RTO timer * * Measure RTT: We have two sources of RTT measurements: TSOPT and ACK * timing. Middle boxes are known to fiddle with TCP options so we * should give higher priority to ACK timing. * + * This should be called only if previously sent bytes have been acked. + * * return 1 if valid rtt 0 otherwise */ static int tcp_update_rtt (tcp_connection_t * tc, u32 ack) { u32 mrtt = 0; - u8 rtx_acked; - - /* Determine if only rtx bytes are acked. */ - rtx_acked = tcp_in_cong_recovery (tc) || !tc->bytes_acked; /* Karn's rule, part 1. Don't use retransmitted segments to estimate * RTT because they're ambiguous. */ - if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq) && !rtx_acked) + if (tcp_in_cong_recovery (tc) || tc->sack_sb.sacked_bytes) + goto done; + + if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq)) { mrtt = tcp_time_now () - tc->rtt_ts; } /* As per RFC7323 TSecr can be used for RTTM only if the segment advances * snd_una, i.e., the left side of the send window: - * seq_lt (tc->snd_una, ack). */ - else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr - && tc->bytes_acked) + * seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */ + else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr) { mrtt = tcp_time_now () - tc->rcv_opts.tsecr; } - /* Allow measuring of a new RTT */ - tc->rtt_ts = 0; - - /* If ACK moves left side of the wnd make sure boff is 0, even if mrtt is - * not valid */ - if (tc->bytes_acked) - tc->rto_boff = 0; - /* Ignore dubious measurements */ if (mrtt == 0 || mrtt > TCP_RTT_MAX) - return 0; + goto done; tcp_estimate_rtt (tc, mrtt); + +done: + + /* Allow measuring of a new RTT */ + tc->rtt_ts = 0; + + /* If we got here something must've been ACKed so make sure boff is 0, + * even if mrrt is not valid since we update the rto lower */ + tc->rto_boff = 0; tcp_update_rto (tc); return 0; @@ -932,10 +934,11 @@ static void tcp_cc_recovery_exit (tcp_connection_t * tc) { /* Deflate rto */ - tcp_update_rto (tc); tc->rto_boff = 0; + tcp_update_rto (tc); tc->snd_rxt_ts = 0; tcp_recovery_off (tc); + TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3); } void @@ -946,6 +949,7 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc) tc->rcv_dupacks = 0; tcp_fastrecovery_off (tc); tcp_fastrecovery_1_smss_off (tc); + TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3); } static void @@ -958,13 +962,14 @@ tcp_cc_congestion_undo (tcp_connection_t * tc) if (tcp_in_recovery (tc)) tcp_cc_recovery_exit (tc); ASSERT (tc->rto_boff == 0); + TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 5); /* TODO extend for fastrecovery */ } static u8 tcp_cc_is_spurious_retransmit (tcp_connection_t * tc) { - return (tcp_in_recovery (tc) + return (tcp_in_recovery (tc) && tc->rto_boff == 1 && tc->snd_rxt_ts && tcp_opts_tstamp (&tc->rcv_opts) && timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts)); @@ -988,7 +993,6 @@ tcp_cc_recover (tcp_connection_t * tc) ASSERT (tc->rto_boff == 0); ASSERT (!tcp_in_cong_recovery (tc)); ASSERT (tcp_scoreboard_is_sane_post_recovery (tc)); - TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3); return 0; } @@ -2115,7 +2119,6 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, new_tc0->flags |= TCP_CONN_SNDACK; /* Update rtt with the syn-ack sample */ - new_tc0->bytes_acked = 1; tcp_update_rtt (new_tc0, vnet_buffer (b0)->tcp.ack_number); TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, new_tc0); } @@ -2352,7 +2355,6 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } /* Update rtt and rto */ - tc0->bytes_acked = 1; tcp_update_rtt (tc0, vnet_buffer (b0)->tcp.ack_number); /* Switch state to ESTABLISHED */ diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index be29f05f65c..cb1fcc9ab49 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -918,7 +918,24 @@ tcp_send_reset (tcp_connection_t * tc) opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts); ASSERT (opts_write_len == tc->snd_opts_len); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; - tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4); + if (tc->c_is_ip4) + { + ip4_header_t *ih4; + ih4 = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip.ip4, + &tc->c_rmt_ip.ip4, IP_PROTOCOL_TCP, 0); + th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4); + } + else + { + int bogus = ~0; + ip6_header_t *ih6; + ih6 = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip.ip6, + &tc->c_rmt_ip.ip6, IP_PROTOCOL_TCP); + th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus); + ASSERT (!bogus); + } + tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4); + TCP_EVT_DBG (TCP_EVT_RST_SENT, tc); } void @@ -1324,7 +1341,7 @@ tcp_rtx_timeout_cc (tcp_connection_t * tc) tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss); tc->cwnd = tcp_loss_wnd (tc); tc->snd_congestion = tc->snd_una_max; - + tc->rtt_ts = 0; tcp_recovery_on (tc); } |