aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/tcp')
-rw-r--r--src/vnet/tcp/tcp.c44
-rw-r--r--src/vnet/tcp/tcp.h23
-rw-r--r--src/vnet/tcp/tcp_input.c22
-rw-r--r--src/vnet/tcp/tcp_output.c2
4 files changed, 72 insertions, 19 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index cb05b8c0533..626b49997ac 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -555,6 +555,16 @@ tcp_init_snd_vars (tcp_connection_t * tc)
tc->snd_una_max = tc->snd_nxt;
}
+void
+tcp_enable_pacing (tcp_connection_t * tc)
+{
+ u32 max_burst, byte_rate;
+ max_burst = 16 * tc->snd_mss;
+ byte_rate = 2 << 16;
+ transport_connection_tx_pacer_init (&tc->connection, byte_rate, max_burst);
+ tc->mrtt_us = (u32) ~ 0;
+}
+
/** Initialize tcp connection variables
*
* Should be called after having received a msg from the peer, i.e., a SYN or
@@ -572,7 +582,11 @@ tcp_connection_init_vars (tcp_connection_t * tc)
if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6))
tcp_add_del_adjacency (tc, 1);
- // tcp_connection_fib_attach (tc);
+ /* tcp_connection_fib_attach (tc); */
+
+ if (transport_connection_is_tx_paced (&tc->connection)
+ || tcp_main.tx_pacing)
+ tcp_enable_pacing (tc);
}
static int
@@ -784,14 +798,19 @@ format_tcp_vars (u8 * s, va_list * args)
s = format (s, " limited_transmit %u\n", tc->limited_transmit - tc->iss);
s = format (s, " tsecr %u tsecr_last_ack %u\n", tc->rcv_opts.tsecr,
tc->tsecr_last_ack);
- s = format (s, " rto %u rto_boff %u srtt %u rttvar %u rtt_ts %u ", tc->rto,
- tc->rto_boff, tc->srtt, tc->rttvar, tc->rtt_ts);
+ s = format (s, " rto %u rto_boff %u srtt %u rttvar %u rtt_ts %2.5f ",
+ tc->rto, tc->rto_boff, tc->srtt, tc->rttvar, tc->rtt_ts);
s = format (s, "rtt_seq %u\n", tc->rtt_seq);
s = format (s, " tsval_recent %u tsval_recent_age %u\n", tc->tsval_recent,
tcp_time_now () - tc->tsval_recent_age);
if (tc->state >= TCP_STATE_ESTABLISHED)
- s = format (s, " scoreboard: %U\n", format_tcp_scoreboard, &tc->sack_sb,
- tc);
+ {
+ s = format (s, " scoreboard: %U\n", format_tcp_scoreboard, &tc->sack_sb,
+ tc);
+ if (transport_connection_is_tx_paced (&tc->connection))
+ s = format (s, " pacer: %U\n", format_transport_pacer,
+ &tc->connection.pacer);
+ }
if (vec_len (tc->snd_sacks))
s = format (s, " sacks tx: %U\n", format_tcp_sacks, tc);
@@ -1129,6 +1148,19 @@ const static transport_proto_vft_t tcp_proto = {
};
/* *INDENT-ON* */
+void
+tcp_update_pacer (tcp_connection_t * tc)
+{
+ f64 srtt;
+
+ if (!transport_connection_is_tx_paced (&tc->connection))
+ return;
+
+ srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
+ transport_connection_tx_pacer_update (&tc->connection,
+ ((f64) tc->cwnd) / srtt);
+}
+
static void
tcp_timer_keep_handler (u32 conn_index)
{
@@ -1408,6 +1440,8 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "max-rx-fifo %U", unformat_memory_size,
&tm->max_rx_fifo))
;
+ else if (unformat (input, "tx-pacing"))
+ tm->tx_pacing = 1;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index a036072e546..4ba3d5e9d87 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -160,7 +160,7 @@ enum
};
#define TCP_SCOREBOARD_TRACE (0)
-#define TCP_MAX_SACK_BLOCKS 15 /**< Max number of SACK blocks stored */
+#define TCP_MAX_SACK_BLOCKS 32 /**< Max number of SACK blocks stored */
#define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0)
typedef struct _scoreboard_trace_elt
@@ -319,8 +319,9 @@ typedef struct _tcp_connection
u32 rto_boff; /**< Index for RTO backoff */
u32 srtt; /**< Smoothed RTT */
u32 rttvar; /**< Smoothed mean RTT difference. Approximates variance */
- u32 rtt_ts; /**< Timestamp for tracked ACK */
u32 rtt_seq; /**< Sequence number for tracked ACK */
+ f64 rtt_ts; /**< Timestamp for tracked ACK */
+ f64 mrtt_us; /**< High precision mrtt from tracked acks */
u16 mss; /**< Our max seg size that includes options */
u32 limited_transmit; /**< snd_nxt when limited transmit starts */
@@ -444,6 +445,9 @@ typedef struct _tcp_main
u32 last_v6_address_rotor;
ip6_address_t *ip6_src_addresses;
+ /** Enable tx pacing for new connections */
+ u8 tx_pacing;
+
u8 punt_unknown4;
u8 punt_unknown6;
@@ -692,6 +696,12 @@ tcp_time_now (void)
return tcp_main.wrk_ctx[vlib_get_thread_index ()].time_now;
}
+always_inline f64
+tcp_time_now_us (u32 thread_index)
+{
+ return transport_time_now (thread_index);
+}
+
always_inline u32
tcp_set_time_now (u32 thread_index)
{
@@ -706,6 +716,15 @@ void tcp_connection_timers_init (tcp_connection_t * tc);
void tcp_connection_timers_reset (tcp_connection_t * tc);
void tcp_init_snd_vars (tcp_connection_t * tc);
void tcp_connection_init_vars (tcp_connection_t * tc);
+void tcp_update_pacer (tcp_connection_t * tc);
+
+always_inline void
+tcp_cc_rcv_ack (tcp_connection_t * tc)
+{
+ tc->cc_algo->rcv_ack (tc);
+ tcp_update_pacer (tc);
+ tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+}
always_inline void
tcp_connection_force_ack (tcp_connection_t * tc, vlib_buffer_t * b)
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 39a538ba681..ac0e996567e 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -462,14 +462,15 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack)
if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq))
{
- mrtt = tcp_time_now () - tc->rtt_ts;
+ tc->mrtt_us = tcp_time_now_us (tc->c_thread_index) - tc->rtt_ts;
+ mrtt = clib_max ((u32) (tc->mrtt_us * THZ), 1);
}
/* As per RFC7323 TSecr can be used for RTTM only if the segment advances
* snd_una, i.e., the left side of the send window:
* seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
{
- mrtt = tcp_time_now () - tc->rcv_opts.tsecr;
+ mrtt = clib_max (tcp_time_now () - tc->rcv_opts.tsecr, 1);
}
/* Ignore dubious measurements */
@@ -1079,12 +1080,14 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
tc->snd_nxt = tc->snd_una_max;
tc->snd_rxt_bytes = 0;
- /* HACK: since we don't have an output pacer, force slow start */
- tc->cwnd = 20 * tc->snd_mss;
-
tcp_fastrecovery_off (tc);
tcp_fastrecovery_1_smss_off (tc);
tcp_fastrecovery_first_off (tc);
+
+ /* Update pacer because our cwnd changed. Also makes sure
+ * that we recompute the max burst size */
+ tcp_update_pacer (tc);
+
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
}
@@ -1153,8 +1156,7 @@ tcp_cc_update (tcp_connection_t * tc, vlib_buffer_t * b)
ASSERT (!tcp_in_cong_recovery (tc) || tcp_is_lost_fin (tc));
/* Congestion avoidance */
- tc->cc_algo->rcv_ack (tc);
- tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+ tcp_cc_rcv_ack (tc);
/* If a cumulative ack, make sure dupacks is 0 */
tc->rcv_dupacks = 0;
@@ -1372,8 +1374,7 @@ partial_ack:
tc->snd_nxt = tc->snd_una_max;
/* Treat as congestion avoidance ack */
- tc->cc_algo->rcv_ack (tc);
- tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+ tcp_cc_rcv_ack (tc);
return;
}
@@ -1391,8 +1392,7 @@ partial_ack:
/* Post RTO timeout don't try anything fancy */
if (tcp_in_recovery (tc))
{
- tc->cc_algo->rcv_ack (tc);
- tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+ tcp_cc_rcv_ack (tc);
transport_add_tx_event (&tc->connection);
return;
}
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 2e6036b410a..f14a61263d4 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1202,7 +1202,7 @@ tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b)
/* If not tracking an ACK, start tracking */
if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
{
- tc->rtt_ts = tcp_time_now ();
+ tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
tc->rtt_seq = tc->snd_nxt;
}
if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))