aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xsrc/vnet/tcp/tcp_debug.h108
-rw-r--r--src/vnet/tcp/tcp_input.c52
-rw-r--r--src/vnet/tcp/tcp_output.c21
3 files changed, 100 insertions, 81 deletions
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 4bc6b42e297..eb318cde4b9 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -20,7 +20,7 @@
#define TCP_DEBUG (1)
#define TCP_DEBUG_SM (0)
-#define TCP_DEBUG_CC (1)
+#define TCP_DEBUG_CC (0)
#define TCP_DEBUG_CC_STAT (1)
#define foreach_tcp_dbg_evt \
@@ -411,21 +411,6 @@ typedef enum _tcp_dbg_evt
ed->data[4] = _tc->snd_wnd; \
}
-#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \
-{ \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
- .format_args = "i4i4i4i4i4", \
- }; \
- DECLARE_ETD(_tc, _e, 5); \
- ed->data[0] = _tc->rcv_nxt - _tc->irs; \
- ed->data[1] = _tc->rcv_wnd; \
- ed->data[2] = _tc->snd_nxt - _tc->iss; \
- ed->data[3] = tcp_available_snd_wnd(_tc); \
- ed->data[4] = _tc->snd_wnd; \
-}
-
#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
@@ -441,21 +426,6 @@ typedef enum _tcp_dbg_evt
ed->data[4] = tcp_flight_size(_tc); \
}
-#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) \
-{ \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
- .format_args = "i4i4i4i4i4", \
- }; \
- DECLARE_ETD(_tc, _e, 5); \
- ed->data[0] = _tc->snd_una - _tc->iss; \
- ed->data[1] = _tc->cwnd; \
- ed->data[2] = _tc->snd_wnd; \
- ed->data[3] = tcp_flight_size(_tc); \
- ed->data[4] = _tc->rcv_wnd; \
-}
-
#define TCP_EVT_PKTIZE_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
@@ -601,9 +571,7 @@ if (_av > 0) \
}
#else
#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)
-#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)
-#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
#define TCP_EVT_PKTIZE_HANDLER(_tc, ...)
#define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...)
#define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...)
@@ -649,19 +617,6 @@ if (_av > 0) \
*/
#if TCP_DEBUG_CC
-#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...) \
-{ \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "rxt: snd_nxt %u offset %u snd %u rxt %u", \
- .format_args = "i4i4i4i4", \
- }; \
- DECLARE_ETD(_tc, _e, 4); \
- ed->data[0] = _tc->snd_nxt - _tc->iss; \
- ed->data[1] = offset; \
- ed->data[2] = n_bytes; \
- ed->data[3] = _tc->snd_rxt_bytes; \
-}
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \
{ \
@@ -669,13 +624,14 @@ if (_av > 0) \
{ \
.format = "cc: %s wnd %u snd_cong %u rxt_bytes %u", \
.format_args = "t4i4i4i4", \
- .n_enum_strings = 5, \
+ .n_enum_strings = 6, \
.enum_strings = { \
"fast-rxt", \
"rxt-timeout", \
"first-rxt", \
"recovered", \
"congestion", \
+ "undo", \
}, \
}; \
DECLARE_ETD(_tc, _e, 4); \
@@ -685,6 +641,50 @@ if (_av > 0) \
ed->data[3] = _tc->snd_rxt_bytes; \
}
+#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "rxt: snd_nxt %u offset %u snd %u rxt %u", \
+ .format_args = "i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 4); \
+ ed->data[0] = _tc->snd_nxt - _tc->iss; \
+ ed->data[1] = offset; \
+ ed->data[2] = n_bytes; \
+ ed->data[3] = _tc->snd_rxt_bytes; \
+}
+
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->rcv_nxt - _tc->irs; \
+ ed->data[1] = _tc->rcv_wnd; \
+ ed->data[2] = _tc->snd_nxt - _tc->iss; \
+ ed->data[3] = tcp_available_snd_wnd(_tc); \
+ ed->data[4] = _tc->snd_wnd; \
+}
+
+#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->snd_una - _tc->iss; \
+ ed->data[1] = _tc->cwnd; \
+ ed->data[2] = _tc->snd_wnd; \
+ ed->data[3] = tcp_flight_size(_tc); \
+ ed->data[4] = _tc->rcv_wnd; \
+}
+
#define TCP_EVT_CC_PACK_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
@@ -696,6 +696,13 @@ if (_av > 0) \
ed->data[0] = _tc->snd_una - _tc->iss; \
ed->data[1] = _tc->snd_una_max - _tc->iss; \
}
+#else
+#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
+#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
+#endif
/*
* Congestion control stats
@@ -744,13 +751,6 @@ if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \
#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
#endif
-#else
-#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
-#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
-#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
-#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
-#endif
-
#endif /* SRC_VNET_TCP_TCP_DEBUG_H_ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index bd57eca3223..0a36d063482 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -248,8 +248,8 @@ tcp_update_timestamp (tcp_connection_t * tc, u32 seq, u32 seq_end)
* then the TSval from the segment is copied to TS.Recent;
* otherwise, the TSval is ignored.
*/
- if (tcp_opts_tstamp (&tc->rcv_opts) && tc->tsval_recent
- && seq_leq (seq, tc->rcv_las) && seq_leq (tc->rcv_las, seq_end))
+ if (tcp_opts_tstamp (&tc->rcv_opts) && seq_leq (seq, tc->rcv_las)
+ && seq_leq (tc->rcv_las, seq_end))
{
ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval));
tc->tsval_recent = tc->rcv_opts.tsval;
@@ -418,51 +418,53 @@ tcp_update_rto (tcp_connection_t * tc)
tc->rto = clib_max (tc->rto, TCP_RTO_MIN);
}
-/** Update RTT estimate and RTO timer
+/**
+ * Update RTT estimate and RTO timer
*
* Measure RTT: We have two sources of RTT measurements: TSOPT and ACK
* timing. Middle boxes are known to fiddle with TCP options so we
* should give higher priority to ACK timing.
*
+ * This should be called only if previously sent bytes have been acked.
+ *
* return 1 if valid rtt 0 otherwise
*/
static int
tcp_update_rtt (tcp_connection_t * tc, u32 ack)
{
u32 mrtt = 0;
- u8 rtx_acked;
-
- /* Determine if only rtx bytes are acked. */
- rtx_acked = tcp_in_cong_recovery (tc) || !tc->bytes_acked;
/* Karn's rule, part 1. Don't use retransmitted segments to estimate
* RTT because they're ambiguous. */
- if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq) && !rtx_acked)
+ if (tcp_in_cong_recovery (tc) || tc->sack_sb.sacked_bytes)
+ goto done;
+
+ if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq))
{
mrtt = tcp_time_now () - tc->rtt_ts;
}
/* As per RFC7323 TSecr can be used for RTTM only if the segment advances
* snd_una, i.e., the left side of the send window:
- * seq_lt (tc->snd_una, ack). */
- else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr
- && tc->bytes_acked)
+ * seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
+ else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
{
mrtt = tcp_time_now () - tc->rcv_opts.tsecr;
}
- /* Allow measuring of a new RTT */
- tc->rtt_ts = 0;
-
- /* If ACK moves left side of the wnd make sure boff is 0, even if mrtt is
- * not valid */
- if (tc->bytes_acked)
- tc->rto_boff = 0;
-
/* Ignore dubious measurements */
if (mrtt == 0 || mrtt > TCP_RTT_MAX)
- return 0;
+ goto done;
tcp_estimate_rtt (tc, mrtt);
+
+done:
+
+ /* Allow measuring of a new RTT */
+ tc->rtt_ts = 0;
+
+ /* If we got here something must've been ACKed so make sure boff is 0,
+ * even if mrrt is not valid since we update the rto lower */
+ tc->rto_boff = 0;
tcp_update_rto (tc);
return 0;
@@ -932,10 +934,11 @@ static void
tcp_cc_recovery_exit (tcp_connection_t * tc)
{
/* Deflate rto */
- tcp_update_rto (tc);
tc->rto_boff = 0;
+ tcp_update_rto (tc);
tc->snd_rxt_ts = 0;
tcp_recovery_off (tc);
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
}
void
@@ -946,6 +949,7 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
tc->rcv_dupacks = 0;
tcp_fastrecovery_off (tc);
tcp_fastrecovery_1_smss_off (tc);
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
}
static void
@@ -958,13 +962,14 @@ tcp_cc_congestion_undo (tcp_connection_t * tc)
if (tcp_in_recovery (tc))
tcp_cc_recovery_exit (tc);
ASSERT (tc->rto_boff == 0);
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 5);
/* TODO extend for fastrecovery */
}
static u8
tcp_cc_is_spurious_retransmit (tcp_connection_t * tc)
{
- return (tcp_in_recovery (tc)
+ return (tcp_in_recovery (tc) && tc->rto_boff == 1
&& tc->snd_rxt_ts
&& tcp_opts_tstamp (&tc->rcv_opts)
&& timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts));
@@ -988,7 +993,6 @@ tcp_cc_recover (tcp_connection_t * tc)
ASSERT (tc->rto_boff == 0);
ASSERT (!tcp_in_cong_recovery (tc));
ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
- TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
return 0;
}
@@ -2115,7 +2119,6 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
new_tc0->flags |= TCP_CONN_SNDACK;
/* Update rtt with the syn-ack sample */
- new_tc0->bytes_acked = 1;
tcp_update_rtt (new_tc0, vnet_buffer (b0)->tcp.ack_number);
TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, new_tc0);
}
@@ -2352,7 +2355,6 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
/* Update rtt and rto */
- tc0->bytes_acked = 1;
tcp_update_rtt (tc0, vnet_buffer (b0)->tcp.ack_number);
/* Switch state to ESTABLISHED */
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index be29f05f65c..cb1fcc9ab49 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -918,7 +918,24 @@ tcp_send_reset (tcp_connection_t * tc)
opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
ASSERT (opts_write_len == tc->snd_opts_len);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
- tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4);
+ if (tc->c_is_ip4)
+ {
+ ip4_header_t *ih4;
+ ih4 = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip.ip4,
+ &tc->c_rmt_ip.ip4, IP_PROTOCOL_TCP, 0);
+ th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
+ }
+ else
+ {
+ int bogus = ~0;
+ ip6_header_t *ih6;
+ ih6 = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip.ip6,
+ &tc->c_rmt_ip.ip6, IP_PROTOCOL_TCP);
+ th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
+ ASSERT (!bogus);
+ }
+ tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
void
@@ -1324,7 +1341,7 @@ tcp_rtx_timeout_cc (tcp_connection_t * tc)
tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
tc->cwnd = tcp_loss_wnd (tc);
tc->snd_congestion = tc->snd_una_max;
-
+ tc->rtt_ts = 0;
tcp_recovery_on (tc);
}