From eedc74b804a955de39e013f14c97f2abb4770157 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Fri, 31 Jul 2020 12:32:40 -0700 Subject: tcp: improve timestamp rtt accuracy - switch to using vlib_time as reference for timestamps - use us precision ticks for tcp but keep using ms precision for timestamps. As a result, srtt, rttvar and rto are now measured in us instead of ms. MRTT samples from timestamps are converted from ms to us (not accurate under ms) while high precision samples are used with us precision, i.e., they're no longer converted to ms precision samples. Type: improvement Change-Id: Ibda559575d9b4fdc85b0985264f7c865ff367e34 Signed-off-by: Florin Coras --- src/vnet/tcp/tcp.c | 7 +------ src/vnet/tcp/tcp.h | 4 +--- src/vnet/tcp/tcp_cli.c | 9 +++++---- src/vnet/tcp/tcp_inlines.h | 4 +--- src/vnet/tcp/tcp_input.c | 43 +++++++++++++++++++++---------------------- src/vnet/tcp/tcp_types.h | 14 +++++++++----- 6 files changed, 38 insertions(+), 43 deletions(-) (limited to 'src/vnet') diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index c30a69304bc..938a863238f 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -681,7 +681,7 @@ tcp_init_snd_vars (tcp_connection_t * tc) tc->snd_una = tc->iss; tc->snd_nxt = tc->iss + 1; tc->snd_una_max = tc->snd_nxt; - tc->srtt = 100; /* 100 ms */ + tc->srtt = 0.1 * THZ; /* 100 ms */ if (!tcp_cfg.csum_offload) tc->cfg_flags |= TCP_CFG_F_NO_CSUM_OFFLOAD; @@ -1361,11 +1361,6 @@ tcp_main_enable (vlib_main_t * vm) pool_init_fixed (tm->half_open_connections, tcp_cfg.preallocated_half_open_connections); - /* Initialize clocks per tick for TCP timestamp. Used to compute - * monotonically increasing timestamps. */ - tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock - / TCP_TSTAMP_RESOLUTION; - if (num_threads > 1) { clib_spinlock_init (&tm->half_open_lock); diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 91783a6e025..bc6e353b60e 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -91,7 +91,7 @@ typedef struct tcp_worker_ctx_ /** convenience pointer to this thread's vlib main */ vlib_main_t *vm; - /** worker time */ + /** Time measured in @ref TCP_TSTAMP_TICK used for time stamps */ u32 time_now; /* Max timers to be handled per dispatch loop */ @@ -209,8 +209,6 @@ typedef struct _tcp_main /* Pool of listeners. */ tcp_connection_t *listener_pool; - f64 tstamp_ticks_per_clock; - /** vlib buffer size */ u32 bytes_per_buffer; diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c index b76b40474a6..6030440435d 100644 --- a/src/vnet/tcp/tcp_cli.c +++ b/src/vnet/tcp/tcp_cli.c @@ -205,10 +205,11 @@ format_tcp_vars (u8 * s, va_list * args) tc->rcv_opts.tsecr, tc->tsecr_last_ack, tcp_time_now () - tc->tsval_recent_age); s = format (s, " snd_mss %u\n", tc->snd_mss); - s = format (s, " rto %u rto_boff %u srtt %u us %.3f rttvar %u rtt_ts %.4f", - tc->rto, tc->rto_boff, tc->srtt, tc->mrtt_us * 1000, tc->rttvar, - tc->rtt_ts); - s = format (s, " rtt_seq %u\n", tc->rtt_seq - tc->iss); + s = format (s, " rto %u rto_boff %u srtt %.1f us %.3f rttvar %.1f", + tc->rto / 1000, tc->rto_boff, tc->srtt / 1000.0, + tc->mrtt_us * 1e3, tc->rttvar / 1000.0); + s = format (s, " rtt_ts %.4f rtt_seq %u\n", tc->rtt_ts, + tc->rtt_seq - tc->iss); s = format (s, " next_node %u opaque 0x%x fib_index %u\n", tc->next_node_index, tc->next_node_opaque, tc->c_fib_index); s = format (s, " cong: %U", format_tcp_congestion, tc); diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h index c4b155aa499..cb00ca4a97b 100644 --- a/src/vnet/tcp/tcp_inlines.h +++ b/src/vnet/tcp/tcp_inlines.h @@ -218,9 +218,7 @@ tcp_time_now_us (u32 thread_index) always_inline u32 tcp_set_time_now (tcp_worker_ctx_t * wrk) { - tcp_main_t *tm = &tcp_main; - wrk->time_now = (u64) (clib_cpu_time_now () * tm->tstamp_ticks_per_clock); - return wrk->time_now; + return wrk->time_now = (u64) (vlib_time_now (wrk->vm) * TCP_TSTP_HZ); } always_inline tcp_connection_t * diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 519219e2c74..5fa7bf23c72 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -424,31 +424,26 @@ acceptable: /** * Compute smoothed RTT as per VJ's '88 SIGCOMM and RFC6298 * - * Note that although the original article, srtt and rttvar are scaled + * Note that although in the original article srtt and rttvar are scaled * to minimize round-off errors, here we don't. Instead, we rely on * better precision time measurements. + * + * A known limitation of the algorithm is that a drop in rtt results in a + * rttvar increase and bigger RTO. + * + * mrtt must be provided in @ref TCP_TICK multiples, i.e., in us. Note that + * timestamps are measured as ms ticks so they must be converted before + * calling this function. */ static void tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt) { int err, diff; - if (tc->srtt != 0) - { - err = mrtt - tc->srtt; - - /* XXX Drop in RTT results in RTTVAR increase and bigger RTO. - * The increase should be bound */ - tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1); - diff = (clib_abs (err) - (int) tc->rttvar) >> 2; - tc->rttvar = clib_max ((int) tc->rttvar + diff, 1); - } - else - { - /* First measurement. */ - tc->srtt = mrtt; - tc->rttvar = mrtt >> 1; - } + err = mrtt - tc->srtt; + tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1); + diff = (clib_abs (err) - (int) tc->rttvar) >> 2; + tc->rttvar = clib_max ((int) tc->rttvar + diff, 1); } static inline void @@ -506,8 +501,8 @@ tcp_update_rtt (tcp_connection_t * tc, tcp_rate_sample_t * rs, u32 ack) * seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */ else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr) { - u32 now = tcp_tstamp (tc); - mrtt = clib_max (now - tc->rcv_opts.tsecr, 1); + mrtt = clib_max (tcp_tstamp (tc) - tc->rcv_opts.tsecr, 1); + mrtt *= TCP_TSTP_TO_HZ; } estimate_rtt: @@ -543,8 +538,8 @@ tcp_estimate_initial_rtt (tcp_connection_t * tc) } else { - mrtt = tcp_time_now_w_thread (thread_index) - tc->rcv_opts.tsecr; - mrtt = clib_max (mrtt, 1); + mrtt = tcp_tstamp (tc) - tc->rcv_opts.tsecr; + mrtt = clib_max (mrtt, 1) * TCP_TSTP_TO_HZ; /* Due to retransmits we don't know the initial mrtt */ if (tc->rto_boff && mrtt > 1 * THZ) mrtt = 1 * THZ; @@ -552,7 +547,11 @@ tcp_estimate_initial_rtt (tcp_connection_t * tc) } if (mrtt > 0 && mrtt < TCP_RTT_MAX) - tcp_estimate_rtt (tc, mrtt); + { + /* First measurement as per RFC 6298 */ + tc->srtt = mrtt; + tc->rttvar = mrtt >> 1; + } tcp_update_rto (tc); } diff --git a/src/vnet/tcp/tcp_types.h b/src/vnet/tcp/tcp_types.h index 3cf4e9e33ef..d7bcac5e3bc 100644 --- a/src/vnet/tcp/tcp_types.h +++ b/src/vnet/tcp/tcp_types.h @@ -22,10 +22,14 @@ #include #include -#define TCP_TICK 0.001 /**< TCP tick period (s) */ -#define THZ (u32) (1/TCP_TICK) /**< TCP tick frequency */ -#define TCP_TSTAMP_RESOLUTION TCP_TICK /**< Time stamp resolution */ -#define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */ +#define TCP_TICK 0.000001 /**< TCP tick period (s) */ +#define THZ (u32) (1/TCP_TICK) /**< TCP tick frequency */ + +#define TCP_TSTP_TICK 0.001 /**< Timestamp tick (s) */ +#define TCP_TSTP_HZ (u32) (1/TCP_TSTP_TICK) /**< Timestamp freq */ +#define TCP_PAWS_IDLE (24 * 86400 * TCP_TSTP_HZ)/**< 24 days */ +#define TCP_TSTP_TO_HZ (u32) (TCP_TSTP_TICK * THZ) + #define TCP_FIB_RECHECK_PERIOD 1 * THZ /**< Recheck every 1s */ #define TCP_MAX_OPTION_SPACE 40 #define TCP_CC_DATA_SZ 24 @@ -355,7 +359,7 @@ typedef struct _tcp_connection /* RTT and RTO */ u32 rto; /**< Retransmission timeout */ u32 rto_boff; /**< Index for RTO backoff */ - u32 srtt; /**< Smoothed RTT */ + u32 srtt; /**< Smoothed RTT measured in @ref TCP_TICK */ u32 rttvar; /**< Smoothed mean RTT difference. Approximates variance */ u32 rtt_seq; /**< Sequence number for tracked ACK */ f64 rtt_ts; /**< Timestamp for tracked ACK */ -- cgit 1.2.3-korg